From 57586e63b7f7f1f91c3b5e41483bbbb6b2ee04d7 Mon Sep 17 00:00:00 2001 From: Kernel Build Daemon Date: Nov 25 2022 09:11:42 +0000 Subject: Merge branch 'SLE15-SP5' into SLE15-SP5-AZURE --- diff --git a/blacklist.conf b/blacklist.conf index b0f2130..80029be 100644 --- a/blacklist.conf +++ b/blacklist.conf @@ -167,7 +167,6 @@ a82fa1213d12a708fde219f97ea3e76569694fed # usb: reverting the above in stable-5. f5e3b7f0f0b78514d68c44e7909ff2547c51bbb7 # usb: reverting the above in stable-5.15.1 b7a0a792f864583207c593b50fd1b752ed89f4c1 # xhci: reverted by stable-5.14.17 0979b923ff3f963005b69ce44bc82db183c30e25 # xhci: reverting the above in stable-5.14.17 -868c250bb4639531ff33b2d879fbef39c1d9ed39 # not needed for our configs be4c096e6ba7728f4a1ead1de820d75436aedbd9 # arm64:dts:qcom: not applicable 5d1ceb3969b6b2e47e2df6d17790a7c5a20fcbb4 # x86: reverted by stable-5.14.20 b51c1a592faaf114b33f56f25aec757d21a826e0 # x86: reverting the above in stable-5.4.20 @@ -192,7 +191,6 @@ a9cdc1c5e3700a5200e5ca1f90b6958b6483845b # Checked into perf userspace package 218848835699879ed6260ec49bbb22e9e7839017 # went in through stable 1cbf731ef3a17b3dd4c22ed0c634ac126d1a4876 # The docs that this patch removes never got backported 3e2a56e6f639492311e0a8533f0a7aed60816308 # optimization only -a672b2e36a648afb04ad3bda93b6bda947a479a5 # we lack type changes needed to make bpf_ret composable, not serious enough to justify backport 5a897531e00243cebbcc4dbe4ab06cd559ccf53f # added to perf userspace package 3d1d57debee2d342a47615707588b96658fabb85 # added to perf userspace package 3606c0e1a1050d397ad759a62607e419fd8b0ccb # added to perf userspace package @@ -234,7 +232,6 @@ dabe729dddca550446e9cc118c96d1f91703345b # Only code & comment cleanup 057178cf518e699695a4b614a7a08c350b1fdcfd # Only comment fix 538f4f022a4612f969d5324ee227403c9f8b1d72 # Only adds comments 1241ebeca3f94b417751cb3ff62454cefdac75bc # Needs also 8306a5f56305 ("iomap: Add iomap_invalidate_folio") we don't have -480d42dc001bbfe953825a92073012fcd5a99161 # Bug introduced with edb0872f44ec ("block: move the bdi from the request_queue to the gendisk") bf9727a27442a50c75b7d99a5088330c578b2a42 # misattributed. Introduced in 680a2ead741ad9b479a53adf154ed5eee74d2b9a, which we do not have 460a79e18842caca6fa0c415de4a3ac1e671ac50 # not a real bug, mostly refactoring 91b96f0008a2d66d76b525556e4818f5a4a089e4 # not applicable: drm/i915: Drop all references to DRM IRQ midlayer @@ -342,6 +339,8 @@ cfd3a9be0ac423be41afcc7a07d708056bf097a8 # selftest:vm: reverting the above 504627ee4cf4a2d42cba7ce156d423299c06a618 # net: wwan: already applied d68c2e1d19c540464ad12a8a11bdd88eedcaf3dc # octeontx2-nicvf:: a76053707dbf is not applied d8d83d8ab0a453e17e68b3a3bed1f940c34b8646 # lib/crypto: blake2s: breakes KABI +d11219ad53dcf61ced53ca60fe0c4a8d34393e6c # revereted by c653c591789b3acfa4bf6ae45d5af4f330e50a91 +c653c591789b3acfa4bf6ae45d5af4f330e50a91 # reverts d11219ad53dcf61ced53ca60fe0c4a8d34393e6c c51ba246cb172c9e947dc6fb8868a1eaf0b2a913 # cleanup with a risk of regressions 14c174633f349cb41ea90c2c0aaddac157012f74 # tracepoints removal cleanup, not a bug d41b60359ffb24a39c93ea1f4bffaafd651118c3 # Documentation fix @@ -422,3 +421,28 @@ ccf16413e520164eb718cf8b22a30438da80ff23 # The fixing issue doesn't exist e030759a1ddcbf61d42b6e996bfeb675e0032d8b # other depended patche breaks KABI 55749769fe608fa3f4a075e42e89d237c8e37637 # Xen guests under KVM not supported fcb732d8f8cf6084f8480015ad41d25fb023a4dd # Xen guests under KVM not supported +6e4d56db30a5feb83ba233a68841ba79483e7731 # needs extensive intrusive prerequisites in TypeC +01e16cb67cce68afaeb9c7bed72299036dbb0bc1 # duplicate of 7d0c009043f6a970f62dbf5aecda9f8c3ccafcff +eabd9a3807e17e211690e6c40f1405b427b64c48 # breaks kABI in an unfixable manner +758babb511d883cd2aa784d48a362d92119ade99 # duplicate of 65a3e6c8d3f7c346813a05f3d76fc46b640d76d6 +9cfebda442f73a5810d03c635645193634ba85e7 # duplicate of fe4326c8d18dc8a54affdc9ab269ad92dafef659 +07358194badf73e267289b40b761f5dc56928eab # breaks kABI in an unfixable manner +ee8348496c77e3737d0a6cda307a521f2cff954f # no ppc 32s support +4f1d038b5ea1b45d8265a5407712f975b600bb94 # no ppc 4xx support +bba496656a73fc1d1330b49c7f82843836e9feb1 # no ppc 32bit support +0d375d610fa96524e2ee2b46830a46a7bfa92a9f # no ppc fsl support +d37823c3528e5e0705fc7746bcbc2afffb619259 # no ppc 32s support +9bb162fa26ed76031ed0e7dbc77ccea0bf977758 # no ppc 32s support +5ebb74749202a25da4b3cc2eb15470225a05527c # no ppc fsl support +cb7356986db020c96f37532042fdae6706e81df7 # no pc3 support +2863dd2db23e0407f6c50b8ba5c0e55abef894f1 # no ppc 32bit support +fcee96924ba1596ca80a6770b2567ca546f9a482 # no ppc fsl support +20a9689b3607456d92c6fb764501f6a95950b098 # no microwatt support +0c551abfa004ce154d487d91777bf221c808a64f # no ppc fsl support +9be013b2a9ecb29b5168e4b9db0e48ed53acf37c # no ppc 32bit support +fd20b60aea6a37788f2f761af405b41c6c34473b # no mgcoge support +def435c04ee984a5f9ed2711b2bfe946936c6a21 # no ppc fsl support +456c3005102b18cce6662b1915c6efffe7744dcc # no microwatt support +016ff72bd2090903715c0f9422a44afbb966f4ee # no ppc 32bit support +37b9345ce7f4ab17538ea62def6f6d430f091355 # no e500 support +f5aafbc2af51931668799a9c5080c8e35cbb571f # deprecate libbpf's bpf_program__get_prog_info_linear() causing build to fail diff --git a/config/arm64/default b/config/arm64/default index d249cab..598ab25 100644 --- a/config/arm64/default +++ b/config/arm64/default @@ -2639,7 +2639,6 @@ CONFIG_ZRAM_WRITEBACK=y # CONFIG_BLK_DEV_COW_COMMON is not set CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 -CONFIG_BLK_DEV_CRYPTOLOOP=m # CONFIG_BLK_DEV_DRBD is not set CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_SX8=m @@ -2759,7 +2758,7 @@ CONFIG_PVPANIC_PCI=m # CONFIG_SCSI_MOD=m CONFIG_RAID_ATTRS=m -CONFIG_SCSI_COMMON=y +CONFIG_SCSI_COMMON=m CONFIG_SCSI=m CONFIG_SCSI_DMA=y CONFIG_SCSI_NETLINK=y @@ -4622,6 +4621,7 @@ CONFIG_TCG_VTPM_PROXY=m CONFIG_TCG_TIS_ST33ZP24=m CONFIG_TCG_TIS_ST33ZP24_I2C=m CONFIG_TCG_TIS_ST33ZP24_SPI=m +# CONFIG_S390_UV_UAPI is not set CONFIG_CRASHER=m CONFIG_XILLYBUS_CLASS=m CONFIG_XILLYBUS=m @@ -10692,7 +10692,6 @@ CONFIG_RAS=y CONFIG_LIBNVDIMM=m CONFIG_BLK_DEV_PMEM=m -CONFIG_ND_BLK=m CONFIG_ND_CLAIM=y CONFIG_ND_BTT=m CONFIG_BTT=y diff --git a/config/arm64/kvmsmall b/config/arm64/kvmsmall index 9ba273c..e0ebafd 100644 --- a/config/arm64/kvmsmall +++ b/config/arm64/kvmsmall @@ -341,7 +341,6 @@ CONFIG_LOCALVERSION="-kvmsmall" # CONFIG_MPLS is not set # CONFIG_MTD is not set # CONFIG_NATIONAL_PHY is not set -CONFIG_ND_BLK=y CONFIG_ND_BTT=y CONFIG_ND_PFN=y # CONFIG_NET_DSA is not set diff --git a/config/armv7hl/default b/config/armv7hl/default index c4b2966..0b44d3d 100644 --- a/config/armv7hl/default +++ b/config/armv7hl/default @@ -1095,6 +1095,7 @@ CONFIG_SYSV68_PARTITION=y # CONFIG_CMDLINE_PARTITION is not set # end of Partition Types +# CONFIG_BLOCK_COMPAT is not set CONFIG_BLK_MQ_PCI=y CONFIG_BLK_MQ_VIRTIO=y CONFIG_BLK_PM=y @@ -2645,7 +2646,6 @@ CONFIG_ZRAM_WRITEBACK=y # CONFIG_BLK_DEV_COW_COMMON is not set CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 -CONFIG_BLK_DEV_CRYPTOLOOP=m # CONFIG_BLK_DEV_DRBD is not set CONFIG_BLK_DEV_NBD=m # CONFIG_BLK_DEV_SX8 is not set @@ -2753,7 +2753,7 @@ CONFIG_UACCE=m # CONFIG_SCSI_MOD=m CONFIG_RAID_ATTRS=m -CONFIG_SCSI_COMMON=y +CONFIG_SCSI_COMMON=m CONFIG_SCSI=m CONFIG_SCSI_DMA=y CONFIG_SCSI_NETLINK=y @@ -4431,6 +4431,7 @@ CONFIG_TCG_FTPM_TEE=m CONFIG_TCG_TIS_ST33ZP24=m CONFIG_TCG_TIS_ST33ZP24_I2C=m CONFIG_TCG_TIS_ST33ZP24_SPI=m +# CONFIG_S390_UV_UAPI is not set CONFIG_CRASHER=m CONFIG_XILLYBUS_CLASS=m CONFIG_XILLYBUS=m diff --git a/config/armv7hl/lpae b/config/armv7hl/lpae index 6ad1501..2e4216e 100644 --- a/config/armv7hl/lpae +++ b/config/armv7hl/lpae @@ -149,7 +149,6 @@ CONFIG_MMU_GATHER_TABLE_FREE=y # CONFIG_MVNETA_BM is not set # CONFIG_MV_XOR is not set # CONFIG_MXC6255 is not set -CONFIG_ND_BLK=m CONFIG_ND_BTT=m CONFIG_ND_CLAIM=y # CONFIG_NETDEV_NOTIFIER_ERROR_INJECT is not set diff --git a/config/ppc64le/default b/config/ppc64le/default index 5ea547c..ce4d6b4 100644 --- a/config/ppc64le/default +++ b/config/ppc64le/default @@ -744,6 +744,7 @@ CONFIG_SYSV68_PARTITION=y # CONFIG_CMDLINE_PARTITION is not set # end of Partition Types +# CONFIG_BLOCK_COMPAT is not set CONFIG_BLK_MQ_PCI=y CONFIG_BLK_MQ_VIRTIO=y CONFIG_BLK_MQ_RDMA=y @@ -2062,7 +2063,6 @@ CONFIG_ZRAM_WRITEBACK=y # CONFIG_BLK_DEV_COW_COMMON is not set CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 -CONFIG_BLK_DEV_CRYPTOLOOP=m # CONFIG_BLK_DEV_DRBD is not set CONFIG_BLK_DEV_NBD=m # CONFIG_BLK_DEV_SX8 is not set @@ -2180,7 +2180,7 @@ CONFIG_PVPANIC_PCI=m # CONFIG_SCSI_MOD=m CONFIG_RAID_ATTRS=m -CONFIG_SCSI_COMMON=y +CONFIG_SCSI_COMMON=m CONFIG_SCSI=m CONFIG_SCSI_DMA=y CONFIG_SCSI_NETLINK=y @@ -3577,6 +3577,7 @@ CONFIG_TCG_ATMEL=m CONFIG_TCG_IBMVTPM=y CONFIG_TCG_VTPM_PROXY=m # CONFIG_TCG_TIS_ST33ZP24_I2C is not set +# CONFIG_S390_UV_UAPI is not set CONFIG_CRASHER=m CONFIG_XILLYBUS_CLASS=m CONFIG_XILLYBUS=m @@ -5556,7 +5557,6 @@ CONFIG_RAS=y CONFIG_LIBNVDIMM=m CONFIG_BLK_DEV_PMEM=m -CONFIG_ND_BLK=m CONFIG_ND_CLAIM=y CONFIG_ND_BTT=m CONFIG_BTT=y diff --git a/config/s390x/default b/config/s390x/default index 9f78645..2567224 100644 --- a/config/s390x/default +++ b/config/s390x/default @@ -1634,7 +1634,6 @@ CONFIG_ZRAM_WRITEBACK=y # CONFIG_BLK_DEV_COW_COMMON is not set CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 -CONFIG_BLK_DEV_CRYPTOLOOP=m # CONFIG_BLK_DEV_DRBD is not set CONFIG_BLK_DEV_NBD=m # CONFIG_BLK_DEV_SX8 is not set @@ -1649,7 +1648,6 @@ CONFIG_ATA_OVER_ETH=m # # S/390 block device drivers # -CONFIG_BLK_DEV_XPRAM=m CONFIG_DCSSBLK=m CONFIG_DASD=m CONFIG_DASD_PROFILE=y @@ -1752,7 +1750,7 @@ CONFIG_PVPANIC_PCI=m # CONFIG_SCSI_MOD=m CONFIG_RAID_ATTRS=m -CONFIG_SCSI_COMMON=y +CONFIG_SCSI_COMMON=m CONFIG_SCSI=m CONFIG_SCSI_DMA=y CONFIG_SCSI_NETLINK=y @@ -2355,6 +2353,7 @@ CONFIG_SCLP_VT220_TTY=y CONFIG_SCLP_VT220_CONSOLE=y CONFIG_HMC_DRV=m CONFIG_SCLP_OFB=y +CONFIG_S390_UV_UAPI=y CONFIG_S390_TAPE=m # diff --git a/config/s390x/zfcpdump b/config/s390x/zfcpdump index 2cde91c..925316b 100644 --- a/config/s390x/zfcpdump +++ b/config/s390x/zfcpdump @@ -479,6 +479,7 @@ CONFIG_EFI_PARTITION=y # end of Partition Types CONFIG_BLOCK_COMPAT=y +# CONFIG_BLK_MQ_PCI is not set CONFIG_BLK_MQ_VIRTIO=y # CONFIG_BLK_PM is not set CONFIG_BLOCK_HOLDER_DEPRECATED=y @@ -915,6 +916,7 @@ CONFIG_SCLP_VT220_TTY=y CONFIG_SCLP_VT220_CONSOLE=y # CONFIG_HMC_DRV is not set # CONFIG_SCLP_OFB is not set +# CONFIG_S390_UV_UAPI is not set # CONFIG_S390_TAPE is not set CONFIG_VMLOGRDR=y # CONFIG_VMCP is not set diff --git a/config/x86_64/default b/config/x86_64/default index 53a1519..38616a0 100644 --- a/config/x86_64/default +++ b/config/x86_64/default @@ -2562,7 +2562,6 @@ CONFIG_ZRAM_WRITEBACK=y # CONFIG_BLK_DEV_COW_COMMON is not set CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 -CONFIG_BLK_DEV_CRYPTOLOOP=m # CONFIG_BLK_DEV_DRBD is not set CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_SX8=m @@ -2691,7 +2690,7 @@ CONFIG_PVPANIC_PCI=m # CONFIG_SCSI_MOD=m CONFIG_RAID_ATTRS=m -CONFIG_SCSI_COMMON=y +CONFIG_SCSI_COMMON=m CONFIG_SCSI=m CONFIG_SCSI_DMA=y CONFIG_SCSI_NETLINK=y @@ -4404,6 +4403,7 @@ CONFIG_TCG_VTPM_PROXY=m # CONFIG_TCG_TIS_ST33ZP24_I2C is not set # CONFIG_TCG_TIS_ST33ZP24_SPI is not set CONFIG_TELCLOCK=m +# CONFIG_S390_UV_UAPI is not set CONFIG_CRASHER=m CONFIG_XILLYBUS_CLASS=m CONFIG_XILLYBUS=m @@ -9197,7 +9197,6 @@ CONFIG_USB4=y CONFIG_LIBNVDIMM=m CONFIG_BLK_DEV_PMEM=m -CONFIG_ND_BLK=m CONFIG_ND_CLAIM=y CONFIG_ND_BTT=m CONFIG_BTT=y diff --git a/config/x86_64/kvmsmall b/config/x86_64/kvmsmall index 5735499..e91e541 100644 --- a/config/x86_64/kvmsmall +++ b/config/x86_64/kvmsmall @@ -430,7 +430,6 @@ CONFIG_LZ4_DECOMPRESS=m # CONFIG_MTD is not set # CONFIG_MWAVE is not set # CONFIG_NATIONAL_PHY is not set -CONFIG_ND_BLK=y CONFIG_ND_BTT=y CONFIG_ND_PFN=y # CONFIG_NET_DROP_MONITOR is not set diff --git a/patches.suse/0006-nvdimm-blk-Delete-the-block-aperture-window-driver.patch b/patches.suse/0006-nvdimm-blk-Delete-the-block-aperture-window-driver.patch index e0673fd..45e2873 100644 --- a/patches.suse/0006-nvdimm-blk-Delete-the-block-aperture-window-driver.patch +++ b/patches.suse/0006-nvdimm-blk-Delete-the-block-aperture-window-driver.patch @@ -663,7 +663,7 @@ Acked-by: Coly Li libnvdimm-y := core.o --- a/drivers/nvdimm/blk.c +++ /dev/null -@@ -1,327 +0,0 @@ +@@ -1,326 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * NVDIMM Block Window Driver @@ -828,7 +828,7 @@ Acked-by: Coly Li - return err; -} - --static blk_qc_t nd_blk_submit_bio(struct bio *bio) +-static void nd_blk_submit_bio(struct bio *bio) -{ - struct bio_integrity_payload *bip; - struct nd_namespace_blk *nsblk = bio->bi_bdev->bd_disk->private_data; @@ -839,7 +839,7 @@ Acked-by: Coly Li - bool do_acct; - - if (!bio_integrity_prep(bio)) -- return BLK_QC_T_NONE; +- return; - - bip = bio_integrity(bio); - rw = bio_data_dir(bio); @@ -865,7 +865,6 @@ Acked-by: Coly Li - bio_end_io_acct(bio, start); - - bio_endio(bio); -- return BLK_QC_T_NONE; -} - -static int nsblk_rw_bytes(struct nd_namespace_common *ndns, diff --git a/patches.suse/0013-blk-mq-Properly-init-requests-from-blk_mq_alloc_requ.patch b/patches.suse/0013-blk-mq-Properly-init-requests-from-blk_mq_alloc_requ.patch index 86958bd..dd81862 100644 --- a/patches.suse/0013-blk-mq-Properly-init-requests-from-blk_mq_alloc_requ.patch +++ b/patches.suse/0013-blk-mq-Properly-init-requests-from-blk_mq_alloc_requ.patch @@ -11,8 +11,6 @@ Function blk_mq_alloc_request_hctx() is missing zeroing/init of rq->bio, biotail, __sector, and __data_len members, which blk_mq_alloc_request() has, so duplicate what we do in blk_mq_alloc_request(). -(Coly Li: rebased for Linux v5.14 based SUSE kernel) - Fixes: 1f5bd336b9150 ("blk-mq: add blk_mq_alloc_request_hctx") Signed-off-by: John Garry Reviewed-by: Christoph Hellwig @@ -20,14 +18,15 @@ Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/1666780513-121650-1-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe Signed-off-by: Coly Li - --- - block/blk-mq.c | 7 ++++++- + block/blk-mq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 33292c01875d..75c8296b6feb 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c -@@ -520,6 +520,7 @@ struct request *blk_mq_alloc_request_hct +@@ -611,6 +611,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, .nr_tags = 1, }; u64 alloc_time_ns = 0; @@ -35,12 +34,13 @@ Signed-off-by: Coly Li unsigned int cpu; unsigned int tag; int ret; -@@ -564,7 +565,11 @@ struct request *blk_mq_alloc_request_hct +@@ -660,8 +661,12 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, tag = blk_mq_get_tag(&data); if (tag == BLK_MQ_NO_TAG) goto out_queue_exit; -- return blk_mq_rq_ctx_init(&data, tag, alloc_time_ns); -+ rq = blk_mq_rq_ctx_init(&data, tag, alloc_time_ns); +- return blk_mq_rq_ctx_init(&data, blk_mq_tags_from_data(&data), tag, ++ rq = blk_mq_rq_ctx_init(&data, blk_mq_tags_from_data(&data), tag, + alloc_time_ns); + rq->__data_len = 0; + rq->__sector = (sector_t) -1; + rq->bio = rq->biotail = NULL; @@ -48,3 +48,6 @@ Signed-off-by: Coly Li out_queue_exit: blk_queue_exit(q); +-- +2.35.3 + diff --git a/patches.suse/ALSA-memalloc-Don-t-fall-back-for-SG-buffer-with-IOM.patch b/patches.suse/ALSA-memalloc-Don-t-fall-back-for-SG-buffer-with-IOM.patch new file mode 100644 index 0000000..eddbeb2 --- /dev/null +++ b/patches.suse/ALSA-memalloc-Don-t-fall-back-for-SG-buffer-with-IOM.patch @@ -0,0 +1,95 @@ +From 9736a325137b62499d2b4be3fc2d742b131f75da Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Thu, 10 Nov 2022 14:22:16 +0100 +Subject: [PATCH] ALSA: memalloc: Don't fall back for SG-buffer with IOMMU +Git-commit: 9736a325137b62499d2b4be3fc2d742b131f75da +Patch-mainline: v6.1-rc5 +References: git-fixes + +When the non-contiguous page allocation for SG buffer allocation +fails, the memalloc helper tries to fall back to the old page +allocation methods. This would, however, result in the bogus page +addresses when IOMMU is enabled. Usually in such a case, the fallback +allocation should fail as well, but occasionally it succeeds and +hitting a bad access. + +The fallback was thought for non-IOMMU case, and as the error from +dma_alloc_noncontiguous() with IOMMU essentially implies a fatal +memory allocation error, we should return the error straightforwardly +without fallback. This avoids the corner case like the above. + +The patch also renames the local variable "dma_ops" with snd_ prefix +for avoiding the name conflict. + +Fixes: a8d302a0b770 ("ALSA: memalloc: Revive x86-specific WC page allocations again") +Reported-by: Kai Vehmanen +Reviewed-by: Kai Vehmanen +Link: https://lore.kernel.org/r/alpine.DEB.2.22.394.2211041541090.3532114@eliteleevi.tm.intel.com +Link: https://lore.kernel.org/r/20221110132216.30605-1-tiwai@suse.de +Signed-off-by: Takashi Iwai + +--- + sound/core/memalloc.c | 20 +++++++++++--------- + 1 file changed, 11 insertions(+), 9 deletions(-) + +diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c +index 03cffe771366..6a81aaab25ab 100644 +--- a/sound/core/memalloc.c ++++ b/sound/core/memalloc.c +@@ -9,6 +9,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -541,19 +542,20 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size) + struct sg_table *sgt; + void *p; + +- sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir, +- DEFAULT_GFP, 0); +- if (!sgt) { + #ifdef CONFIG_SND_DMA_SGBUF ++ if (!get_dma_ops(dmab->dev.dev)) { + if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG) + dmab->dev.type = SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK; + else + dmab->dev.type = SNDRV_DMA_TYPE_DEV_SG_FALLBACK; + return snd_dma_sg_fallback_alloc(dmab, size); +-#else +- return NULL; +-#endif + } ++#endif ++ ++ sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir, ++ DEFAULT_GFP, 0); ++ if (!sgt) ++ return NULL; + + dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, + sg_dma_address(sgt->sgl)); +@@ -857,7 +859,7 @@ static const struct snd_malloc_ops snd_dma_noncoherent_ops = { + /* + * Entry points + */ +-static const struct snd_malloc_ops *dma_ops[] = { ++static const struct snd_malloc_ops *snd_dma_ops[] = { + [SNDRV_DMA_TYPE_CONTINUOUS] = &snd_dma_continuous_ops, + [SNDRV_DMA_TYPE_VMALLOC] = &snd_dma_vmalloc_ops, + #ifdef CONFIG_HAS_DMA +@@ -883,7 +885,7 @@ static const struct snd_malloc_ops *snd_dma_get_ops(struct snd_dma_buffer *dmab) + if (WARN_ON_ONCE(!dmab)) + return NULL; + if (WARN_ON_ONCE(dmab->dev.type <= SNDRV_DMA_TYPE_UNKNOWN || +- dmab->dev.type >= ARRAY_SIZE(dma_ops))) ++ dmab->dev.type >= ARRAY_SIZE(snd_dma_ops))) + return NULL; +- return dma_ops[dmab->dev.type]; ++ return snd_dma_ops[dmab->dev.type]; + } +-- +2.35.3 + diff --git a/patches.suse/ALSA-memalloc-Try-dma_alloc_noncontiguous-at-first.patch b/patches.suse/ALSA-memalloc-Try-dma_alloc_noncontiguous-at-first.patch new file mode 100644 index 0000000..1339565 --- /dev/null +++ b/patches.suse/ALSA-memalloc-Try-dma_alloc_noncontiguous-at-first.patch @@ -0,0 +1,60 @@ +From 9d8e536d36e75e76614fe09ffab9a1df95b8b666 Mon Sep 17 00:00:00 2001 +From: Takashi Iwai +Date: Sat, 12 Nov 2022 09:47:18 +0100 +Subject: [PATCH] ALSA: memalloc: Try dma_alloc_noncontiguous() at first +Git-commit: 9d8e536d36e75e76614fe09ffab9a1df95b8b666 +Patch-mainline: v6.1-rc5 +References: git-fixes + +The latest fix for the non-contiguous memalloc helper changed the +allocation method for a non-IOMMU system to use only the fallback +allocator. This should have worked, but it caused a problem sometimes +when too many non-contiguous pages are allocated that can't be treated +by HD-audio controller. + +As a quirk workaround, go back to the original strategy: use +dma_alloc_noncontiguous() at first, and apply the fallback only when +it fails, but only for non-IOMMU case. + +We'll need a better fix in the fallback code as well, but this +workaround should paper over most cases. + +Fixes: 9736a325137b ("ALSA: memalloc: Don't fall back for SG-buffer with IOMMU") +Reported-by: Linus Torvalds +Link: https://lore.kernel.org/r/CAHk-=wgSH5ubdvt76gNwa004ooZAEJL_1Q-Fyw5M2FDdqL==dg@mail.gmail.com +Link: https://lore.kernel.org/r/20221112084718.3305-1-tiwai@suse.de +Signed-off-by: Takashi Iwai + +--- + sound/core/memalloc.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c +index 6a81aaab25ab..ba095558b6d1 100644 +--- a/sound/core/memalloc.c ++++ b/sound/core/memalloc.c +@@ -542,8 +542,10 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size) + struct sg_table *sgt; + void *p; + ++ sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir, ++ DEFAULT_GFP, 0); + #ifdef CONFIG_SND_DMA_SGBUF +- if (!get_dma_ops(dmab->dev.dev)) { ++ if (!sgt && !get_dma_ops(dmab->dev.dev)) { + if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG) + dmab->dev.type = SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK; + else +@@ -551,9 +553,6 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size) + return snd_dma_sg_fallback_alloc(dmab, size); + } + #endif +- +- sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir, +- DEFAULT_GFP, 0); + if (!sgt) + return NULL; + +-- +2.35.3 + diff --git a/patches.suse/ASoC-SOF-topology-No-need-to-assign-core-ID-if-token.patch b/patches.suse/ASoC-SOF-topology-No-need-to-assign-core-ID-if-token.patch new file mode 100644 index 0000000..5f19445 --- /dev/null +++ b/patches.suse/ASoC-SOF-topology-No-need-to-assign-core-ID-if-token.patch @@ -0,0 +1,66 @@ +From 3d59eaef49ca2db581156a7b77c9afc0546eefc0 Mon Sep 17 00:00:00 2001 +From: Peter Ujfalusi +Date: Mon, 7 Nov 2022 11:04:33 +0200 +Subject: [PATCH] ASoC: SOF: topology: No need to assign core ID if token parsing failed +Git-commit: 3d59eaef49ca2db581156a7b77c9afc0546eefc0 +Patch-mainline: v6.1-rc6 +References: git-fixes + +Move the return value check before attempting to assign the core ID to the +swidget since we are going to fail the sof_widget_ready() and free up +swidget anyways. + +Fixes: 909dadf21aae ("ASoC: SOF: topology: Make DAI widget parsing IPC agnostic") + +Signed-off-by: Peter Ujfalusi +Reviewed-by: Pierre-Louis Bossart +Reviewed-by: Ranjani Sridharan +Link: https://lore.kernel.org/r/20221107090433.5146-1-peter.ujfalusi@linux.intel.com +Signed-off-by: Mark Brown +Acked-by: Takashi Iwai + +--- + sound/soc/sof/topology.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c +index 38855dd60617..6a0e7f3b5023 100644 +--- a/sound/soc/sof/topology.c ++++ b/sound/soc/sof/topology.c +@@ -1344,16 +1344,6 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index, + break; + } + +- if (sof_debug_check_flag(SOF_DBG_DISABLE_MULTICORE)) { +- swidget->core = SOF_DSP_PRIMARY_CORE; +- } else { +- int core = sof_get_token_value(SOF_TKN_COMP_CORE_ID, swidget->tuples, +- swidget->num_tuples); +- +- if (core >= 0) +- swidget->core = core; +- } +- + /* check token parsing reply */ + if (ret < 0) { + dev_err(scomp->dev, +@@ -1365,6 +1355,16 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index, + return ret; + } + ++ if (sof_debug_check_flag(SOF_DBG_DISABLE_MULTICORE)) { ++ swidget->core = SOF_DSP_PRIMARY_CORE; ++ } else { ++ int core = sof_get_token_value(SOF_TKN_COMP_CORE_ID, swidget->tuples, ++ swidget->num_tuples); ++ ++ if (core >= 0) ++ swidget->core = core; ++ } ++ + /* bind widget to external event */ + if (tw->event_type) { + if (widget_ops[w->id].bind_event) { +-- +2.35.3 + diff --git a/patches.suse/KVM-s390-Add-capability-for-storage-key-extension-of-MEM_OP-IOCTL b/patches.suse/KVM-s390-Add-capability-for-storage-key-extension-of-MEM_OP-IOCTL new file mode 100644 index 0000000..c136add --- /dev/null +++ b/patches.suse/KVM-s390-Add-capability-for-storage-key-extension-of-MEM_OP-IOCTL @@ -0,0 +1,44 @@ +From: Janis Schoetterl-Glausch +Date: Fri, 11 Feb 2022 19:22:13 +0100 +Subject: KVM: s390: Add capability for storage key extension of MEM_OP IOCTL +Git-commit: d004079edc166ff19605475211305923c708b4d5 +Patch-mainline: v5.18-rc1 +References: jsc#PED-579 + +Availability of the KVM_CAP_S390_MEM_OP_EXTENSION capability signals that: +* The vcpu MEM_OP IOCTL supports storage key checking. +* The vm MEM_OP IOCTL exists. + +Signed-off-by: Janis Schoetterl-Glausch +Reviewed-by: Janosch Frank +Reviewed-by: Christian Borntraeger +Link: https://lore.kernel.org/r/20220211182215.2730017-9-scgl@linux.ibm.com +Signed-off-by: Christian Borntraeger +[ ptesarik: As requested by IBM, the capability number has been adjusted + to match upstream commit 4dfc4ec2b7f5a3a27d166ac42cf8a583fa2d3284. ] +Acked-by: Petr Tesarik +--- + arch/s390/kvm/kvm-s390.c | 1 + + include/uapi/linux/kvm.h | 1 + + 2 files changed, 2 insertions(+) + +--- a/arch/s390/kvm/kvm-s390.c ++++ b/arch/s390/kvm/kvm-s390.c +@@ -568,6 +568,7 @@ int kvm_vm_ioctl_check_extension(struct + case KVM_CAP_S390_VCPU_RESETS: + case KVM_CAP_SET_GUEST_DEBUG: + case KVM_CAP_S390_DIAG318: ++ case KVM_CAP_S390_MEM_OP_EXTENSION: + r = 1; + break; + case KVM_CAP_SET_GUEST_DEBUG2: +--- a/include/uapi/linux/kvm.h ++++ b/include/uapi/linux/kvm.h +@@ -1119,6 +1119,7 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 + #define KVM_CAP_ARM_MTE 205 + #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 ++#define KVM_CAP_S390_MEM_OP_EXTENSION 211 + + #ifdef KVM_CAP_IRQ_ROUTING + diff --git a/patches.suse/KVM-s390-Add-missing-vm-MEM_OP-size-check b/patches.suse/KVM-s390-Add-missing-vm-MEM_OP-size-check new file mode 100644 index 0000000..008100a --- /dev/null +++ b/patches.suse/KVM-s390-Add-missing-vm-MEM_OP-size-check @@ -0,0 +1,62 @@ +From: Janis Schoetterl-Glausch +Date: Mon, 21 Feb 2022 17:32:37 +0100 +Subject: KVM: s390: Add missing vm MEM_OP size check +Git-commit: 3d9042f8b923810c169ece02d91c70ec498eff0b +Patch-mainline: v5.18-rc1 +References: jsc#PED-579 + +Check that size is not zero, preventing the following warning: + +WARNING: CPU: 0 PID: 9692 at mm/vmalloc.c:3059 __vmalloc_node_range+0x528/0x648 +Modules linked in: +CPU: 0 PID: 9692 Comm: memop Not tainted 5.17.0-rc3-e4+ #80 +Hardware name: IBM 8561 T01 701 (LPAR) +Krnl PSW : 0704c00180000000 0000000082dc584c (__vmalloc_node_range+0x52c/0x648) + R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 +Krnl GPRS: 0000000000000083 ffffffffffffffff 0000000000000000 0000000000000001 + 0000038000000000 000003ff80000000 0000000000000cc0 000000008ebb8000 + 0000000087a8a700 000000004040aeb1 000003ffd9f7dec8 000000008ebb8000 + 000000009d9b8000 000000000102a1b4 00000380035afb68 00000380035afaa8 +Krnl Code: 0000000082dc583e: d028a7f4ff80 trtr 2036(41,%r10),3968(%r15) + 0000000082dc5844: af000000 mc 0,0 + #0000000082dc5848: af000000 mc 0,0 + >0000000082dc584c: a7d90000 lghi %r13,0 + 0000000082dc5850: b904002d lgr %r2,%r13 + 0000000082dc5854: eb6ff1080004 lmg %r6,%r15,264(%r15) + 0000000082dc585a: 07fe bcr 15,%r14 + 0000000082dc585c: 47000700 bc 0,1792 +Call Trace: + [<0000000082dc584c>] __vmalloc_node_range+0x52c/0x648 + [<0000000082dc5b62>] vmalloc+0x5a/0x68 + [<000003ff8067f4ca>] kvm_arch_vm_ioctl+0x2da/0x2a30 [kvm] + [<000003ff806705bc>] kvm_vm_ioctl+0x4ec/0x978 [kvm] + [<0000000082e562fe>] __s390x_sys_ioctl+0xbe/0x100 + [<000000008360a9bc>] __do_syscall+0x1d4/0x200 + [<0000000083618bd2>] system_call+0x82/0xb0 +Last Breaking-Event-Address: + [<0000000082dc5348>] __vmalloc_node_range+0x28/0x648 + +Other than the warning, there is no ill effect from the missing check, +the condition is detected by subsequent code and causes a return +with ENOMEM. + +Fixes: ef11c9463ae0 (KVM: s390: Add vm IOCTL for key checked guest absolute memory access) +Signed-off-by: Janis Schoetterl-Glausch +Link: https://lore.kernel.org/r/20220221163237.4122868-1-scgl@linux.ibm.com +Signed-off-by: Christian Borntraeger +Acked-by: Petr Tesarik +--- + arch/s390/kvm/kvm-s390.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/s390/kvm/kvm-s390.c ++++ b/arch/s390/kvm/kvm-s390.c +@@ -2406,7 +2406,7 @@ static int kvm_s390_vm_mem_op(struct kvm + + supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION + | KVM_S390_MEMOP_F_CHECK_ONLY; +- if (mop->flags & ~supported_flags) ++ if (mop->flags & ~supported_flags || !mop->size) + return -EINVAL; + if (mop->size > MEM_OP_MAX_SIZE) + return -E2BIG; diff --git a/patches.suse/KVM-s390-Add-optional-storage-key-checking-to-MEMOP-IOCTL b/patches.suse/KVM-s390-Add-optional-storage-key-checking-to-MEMOP-IOCTL new file mode 100644 index 0000000..f1734c2 --- /dev/null +++ b/patches.suse/KVM-s390-Add-optional-storage-key-checking-to-MEMOP-IOCTL @@ -0,0 +1,125 @@ +From: Janis Schoetterl-Glausch +Date: Fri, 11 Feb 2022 19:22:10 +0100 +Subject: KVM: s390: Add optional storage key checking to MEMOP IOCTL +Git-commit: e9e9feebcbc14b174fef862842f8cc9a388e1db3 +Patch-mainline: v5.18-rc1 +References: jsc#PED-579 + +User space needs a mechanism to perform key checked accesses when +emulating instructions. + +The key can be passed as an additional argument. +Having an additional argument is flexible, as user space can +pass the guest PSW's key, in order to make an access the same way the +CPU would, or pass another key if necessary. + +Signed-off-by: Janis Schoetterl-Glausch +Reviewed-by: Claudio Imbrenda +Reviewed-by: Christian Borntraeger +Reviewed-by: Janosch Frank +Link: https://lore.kernel.org/r/20220211182215.2730017-6-scgl@linux.ibm.com +Signed-off-by: Christian Borntraeger +Acked-by: Petr Tesarik +--- + arch/s390/kvm/kvm-s390.c | 31 +++++++++++++++++++++---------- + include/uapi/linux/kvm.h | 6 +++++- + 2 files changed, 26 insertions(+), 11 deletions(-) + +--- a/arch/s390/kvm/kvm-s390.c ++++ b/arch/s390/kvm/kvm-s390.c +@@ -2391,6 +2391,11 @@ static int kvm_s390_handle_pv(struct kvm + return r; + } + ++static bool access_key_invalid(u8 access_key) ++{ ++ return access_key > 0xf; ++} ++ + long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) + { +@@ -4713,17 +4718,21 @@ static long kvm_s390_guest_mem_op(struct + void *tmpbuf = NULL; + int r = 0; + const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION +- | KVM_S390_MEMOP_F_CHECK_ONLY; ++ | KVM_S390_MEMOP_F_CHECK_ONLY ++ | KVM_S390_MEMOP_F_SKEY_PROTECTION; + + if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size) + return -EINVAL; +- + if (mop->size > MEM_OP_MAX_SIZE) + return -E2BIG; +- + if (kvm_s390_pv_cpu_is_protected(vcpu)) + return -EINVAL; +- ++ if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { ++ if (access_key_invalid(mop->key)) ++ return -EINVAL; ++ } else { ++ mop->key = 0; ++ } + if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { + tmpbuf = vmalloc(mop->size); + if (!tmpbuf) +@@ -4733,11 +4742,12 @@ static long kvm_s390_guest_mem_op(struct + switch (mop->op) { + case KVM_S390_MEMOP_LOGICAL_READ: + if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { +- r = check_gva_range(vcpu, mop->gaddr, mop->ar, +- mop->size, GACC_FETCH, 0); ++ r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, ++ GACC_FETCH, mop->key); + break; + } +- r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); ++ r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, ++ mop->size, mop->key); + if (r == 0) { + if (copy_to_user(uaddr, tmpbuf, mop->size)) + r = -EFAULT; +@@ -4745,15 +4755,16 @@ static long kvm_s390_guest_mem_op(struct + break; + case KVM_S390_MEMOP_LOGICAL_WRITE: + if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { +- r = check_gva_range(vcpu, mop->gaddr, mop->ar, +- mop->size, GACC_STORE, 0); ++ r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, ++ GACC_STORE, mop->key); + break; + } + if (copy_from_user(tmpbuf, uaddr, mop->size)) { + r = -EFAULT; + break; + } +- r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); ++ r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, ++ mop->size, mop->key); + break; + } + +--- a/include/uapi/linux/kvm.h ++++ b/include/uapi/linux/kvm.h +@@ -544,7 +544,10 @@ struct kvm_s390_mem_op { + __u32 op; /* type of operation */ + __u64 buf; /* buffer in userspace */ + union { +- __u8 ar; /* the access register number */ ++ struct { ++ __u8 ar; /* the access register number */ ++ __u8 key; /* access key, ignored if flag unset */ ++ }; + __u32 sida_offset; /* offset into the sida */ + __u8 reserved[32]; /* should be set to 0 */ + }; +@@ -557,6 +560,7 @@ struct kvm_s390_mem_op { + /* flags for kvm_s390_mem_op->flags */ + #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) + #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) ++#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) + + /* for KVM_INTERRUPT */ + struct kvm_interrupt { diff --git a/patches.suse/KVM-s390-Add-vm-IOCTL-for-key-checked-guest-absolute-memory-access b/patches.suse/KVM-s390-Add-vm-IOCTL-for-key-checked-guest-absolute-memory-access new file mode 100644 index 0000000..339781f --- /dev/null +++ b/patches.suse/KVM-s390-Add-vm-IOCTL-for-key-checked-guest-absolute-memory-access @@ -0,0 +1,249 @@ +From: Janis Schoetterl-Glausch +Date: Fri, 11 Feb 2022 19:22:11 +0100 +Subject: KVM: s390: Add vm IOCTL for key checked guest absolute memory access +Git-commit: ef11c9463ae006302ce170a401854a48ea0532ca +Patch-mainline: v5.18-rc1 +References: jsc#PED-579 + +Channel I/O honors storage keys and is performed on absolute memory. +For I/O emulation user space therefore needs to be able to do key +checked accesses. +The vm IOCTL supports read/write accesses, as well as checking +if an access would succeed. +Unlike relying on KVM_S390_GET_SKEYS for key checking would, +the vm IOCTL performs the check in lockstep with the read or write, +by, ultimately, mapping the access to move instructions that +support key protection checking with a supplied key. +Fetch and storage protection override are not applicable to absolute +accesses and so are not applied as they are when using the vcpu memop. + +Signed-off-by: Janis Schoetterl-Glausch +Reviewed-by: Christian Borntraeger +Link: https://lore.kernel.org/r/20220211182215.2730017-7-scgl@linux.ibm.com +Signed-off-by: Christian Borntraeger +Acked-by: Petr Tesarik +--- + arch/s390/kvm/gaccess.c | 72 +++++++++++++++++++++++++++++++++++++++++ + arch/s390/kvm/gaccess.h | 6 +++ + arch/s390/kvm/kvm-s390.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++ + include/uapi/linux/kvm.h | 2 + + 4 files changed, 161 insertions(+) + +--- a/arch/s390/kvm/gaccess.c ++++ b/arch/s390/kvm/gaccess.c +@@ -795,6 +795,35 @@ static int low_address_protection_enable + return 1; + } + ++static int vm_check_access_key(struct kvm *kvm, u8 access_key, ++ enum gacc_mode mode, gpa_t gpa) ++{ ++ u8 storage_key, access_control; ++ bool fetch_protected; ++ unsigned long hva; ++ int r; ++ ++ if (access_key == 0) ++ return 0; ++ ++ hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); ++ if (kvm_is_error_hva(hva)) ++ return PGM_ADDRESSING; ++ ++ mmap_read_lock(current->mm); ++ r = get_guest_storage_key(current->mm, hva, &storage_key); ++ mmap_read_unlock(current->mm); ++ if (r) ++ return r; ++ access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key); ++ if (access_control == access_key) ++ return 0; ++ fetch_protected = storage_key & _PAGE_FP_BIT; ++ if ((mode == GACC_FETCH || mode == GACC_IFETCH) && !fetch_protected) ++ return 0; ++ return PGM_PROTECTION; ++} ++ + static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode, + union asce asce) + { +@@ -994,6 +1023,26 @@ access_guest_page_with_key(struct kvm *k + return 0; + } + ++int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data, ++ unsigned long len, enum gacc_mode mode, u8 access_key) ++{ ++ int offset = offset_in_page(gpa); ++ int fragment_len; ++ int rc; ++ ++ while (min(PAGE_SIZE - offset, len) > 0) { ++ fragment_len = min(PAGE_SIZE - offset, len); ++ rc = access_guest_page_with_key(kvm, mode, gpa, data, fragment_len, access_key); ++ if (rc) ++ return rc; ++ offset = 0; ++ len -= fragment_len; ++ data += fragment_len; ++ gpa += fragment_len; ++ } ++ return 0; ++} ++ + int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, + void *data, unsigned long len, enum gacc_mode mode, + u8 access_key) +@@ -1144,6 +1193,29 @@ int check_gva_range(struct kvm_vcpu *vcp + return rc; + } + ++/** ++ * check_gpa_range - test a range of guest physical addresses for accessibility ++ * @kvm: virtual machine instance ++ * @gpa: guest physical address ++ * @length: length of test range ++ * @mode: access mode to test, relevant for storage keys ++ * @access_key: access key to mach the storage keys with ++ */ ++int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length, ++ enum gacc_mode mode, u8 access_key) ++{ ++ unsigned int fragment_len; ++ int rc = 0; ++ ++ while (length && !rc) { ++ fragment_len = min(PAGE_SIZE - offset_in_page(gpa), length); ++ rc = vm_check_access_key(kvm, access_key, mode, gpa); ++ length -= fragment_len; ++ gpa += fragment_len; ++ } ++ return rc; ++} ++ + /** + * kvm_s390_check_low_addr_prot_real - check for low-address protection + * @vcpu: virtual cpu +--- a/arch/s390/kvm/gaccess.h ++++ b/arch/s390/kvm/gaccess.h +@@ -193,6 +193,12 @@ int guest_translate_address_with_key(str + int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, + unsigned long length, enum gacc_mode mode, u8 access_key); + ++int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length, ++ enum gacc_mode mode, u8 access_key); ++ ++int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data, ++ unsigned long len, enum gacc_mode mode, u8 access_key); ++ + int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, + void *data, unsigned long len, enum gacc_mode mode, + u8 access_key); +--- a/arch/s390/kvm/kvm-s390.c ++++ b/arch/s390/kvm/kvm-s390.c +@@ -2396,6 +2396,78 @@ static bool access_key_invalid(u8 access + return access_key > 0xf; + } + ++static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) ++{ ++ void __user *uaddr = (void __user *)mop->buf; ++ u64 supported_flags; ++ void *tmpbuf = NULL; ++ int r, srcu_idx; ++ ++ supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION ++ | KVM_S390_MEMOP_F_CHECK_ONLY; ++ if (mop->flags & ~supported_flags) ++ return -EINVAL; ++ if (mop->size > MEM_OP_MAX_SIZE) ++ return -E2BIG; ++ if (kvm_s390_pv_is_protected(kvm)) ++ return -EINVAL; ++ if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { ++ if (access_key_invalid(mop->key)) ++ return -EINVAL; ++ } else { ++ mop->key = 0; ++ } ++ if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { ++ tmpbuf = vmalloc(mop->size); ++ if (!tmpbuf) ++ return -ENOMEM; ++ } ++ ++ srcu_idx = srcu_read_lock(&kvm->srcu); ++ ++ if (kvm_is_error_gpa(kvm, mop->gaddr)) { ++ r = PGM_ADDRESSING; ++ goto out_unlock; ++ } ++ ++ switch (mop->op) { ++ case KVM_S390_MEMOP_ABSOLUTE_READ: { ++ if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { ++ r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key); ++ } else { ++ r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, ++ mop->size, GACC_FETCH, mop->key); ++ if (r == 0) { ++ if (copy_to_user(uaddr, tmpbuf, mop->size)) ++ r = -EFAULT; ++ } ++ } ++ break; ++ } ++ case KVM_S390_MEMOP_ABSOLUTE_WRITE: { ++ if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { ++ r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key); ++ } else { ++ if (copy_from_user(tmpbuf, uaddr, mop->size)) { ++ r = -EFAULT; ++ break; ++ } ++ r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, ++ mop->size, GACC_STORE, mop->key); ++ } ++ break; ++ } ++ default: ++ r = -EINVAL; ++ } ++ ++out_unlock: ++ srcu_read_unlock(&kvm->srcu, srcu_idx); ++ ++ vfree(tmpbuf); ++ return r; ++} ++ + long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) + { +@@ -2520,6 +2592,15 @@ long kvm_arch_vm_ioctl(struct file *filp + } + break; + } ++ case KVM_S390_MEM_OP: { ++ struct kvm_s390_mem_op mem_op; ++ ++ if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) ++ r = kvm_s390_vm_mem_op(kvm, &mem_op); ++ else ++ r = -EFAULT; ++ break; ++ } + default: + r = -ENOTTY; + } +--- a/include/uapi/linux/kvm.h ++++ b/include/uapi/linux/kvm.h +@@ -557,6 +557,8 @@ struct kvm_s390_mem_op { + #define KVM_S390_MEMOP_LOGICAL_WRITE 1 + #define KVM_S390_MEMOP_SIDA_READ 2 + #define KVM_S390_MEMOP_SIDA_WRITE 3 ++#define KVM_S390_MEMOP_ABSOLUTE_READ 4 ++#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 + /* flags for kvm_s390_mem_op->flags */ + #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) + #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) diff --git a/patches.suse/KVM-s390-Clarify-key-argument-for-MEM_OP-in-api-docs b/patches.suse/KVM-s390-Clarify-key-argument-for-MEM_OP-in-api-docs new file mode 100644 index 0000000..757a85d --- /dev/null +++ b/patches.suse/KVM-s390-Clarify-key-argument-for-MEM_OP-in-api-docs @@ -0,0 +1,30 @@ +From: Janis Schoetterl-Glausch +Date: Mon, 21 Feb 2022 15:36:57 +0100 +Subject: KVM: s390: Clarify key argument for MEM_OP in api docs +Git-commit: cbf9b8109d32a53395369c0dabde005cb8fa3852 +Patch-mainline: v5.18-rc1 +References: jsc#PED-579 + +Clarify that the key argument represents the access key, not the whole +storage key. + +Signed-off-by: Janis Schoetterl-Glausch +Link: https://lore.kernel.org/r/20220221143657.3712481-1-scgl@linux.ibm.com +Fixes: 5e35d0eb472b ("KVM: s390: Update api documentation for memop ioctl") +Signed-off-by: Christian Borntraeger +Acked-by: Petr Tesarik +--- + Documentation/virt/kvm/api.rst | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/Documentation/virt/kvm/api.rst ++++ b/Documentation/virt/kvm/api.rst +@@ -3541,7 +3541,7 @@ KVM_S390_MEMOP_F_INJECT_EXCEPTION is set + + If the KVM_S390_MEMOP_F_SKEY_PROTECTION flag is set, storage key + protection is also in effect and may cause exceptions if accesses are +-prohibited given the access key passed in "key". ++prohibited given the access key designated by "key"; the valid range is 0..15. + KVM_S390_MEMOP_F_SKEY_PROTECTION is available if KVM_CAP_S390_MEM_OP_EXTENSION + is > 0. + diff --git a/patches.suse/KVM-s390-Don-t-indicate-suppression-on-dirtying-failing-memop b/patches.suse/KVM-s390-Don-t-indicate-suppression-on-dirtying-failing-memop new file mode 100644 index 0000000..48949ee --- /dev/null +++ b/patches.suse/KVM-s390-Don-t-indicate-suppression-on-dirtying-failing-memop @@ -0,0 +1,101 @@ +From: Janis Schoetterl-Glausch +Date: Thu, 12 May 2022 15:10:17 +0200 +Subject: KVM: s390: Don't indicate suppression on dirtying, failing memop +Git-commit: c783631b0bffe6060113ff0aafe5fdbd71bea793 +Patch-mainline: v5.19-rc1 +References: jsc#PED-579 + +If user space uses a memop to emulate an instruction and that +memop fails, the execution of the instruction ends. +Instruction execution can end in different ways, one of which is +suppression, which requires that the instruction execute like a no-op. +A writing memop that spans multiple pages and fails due to key +protection may have modified guest memory, as a result, the likely +correct ending is termination. Therefore, do not indicate a +suppressing instruction ending in this case. + +Signed-off-by: Janis Schoetterl-Glausch +Reviewed-by: Christian Borntraeger +Reviewed-by: Claudio Imbrenda +Link: https://lore.kernel.org/r/20220512131019.2594948-2-scgl@linux.ibm.com +Signed-off-by: Christian Borntraeger +Signed-off-by: Janosch Frank +Acked-by: Petr Tesarik +--- + Documentation/virt/kvm/api.rst | 6 ++++++ + arch/s390/kvm/gaccess.c | 22 ++++++++++++++++++---- + 2 files changed, 24 insertions(+), 4 deletions(-) + +--- a/Documentation/virt/kvm/api.rst ++++ b/Documentation/virt/kvm/api.rst +@@ -3538,12 +3538,18 @@ in case of KVM_S390_MEMOP_F_CHECK_ONLY), + error number indicating the type of exception. This exception is also + raised directly at the corresponding VCPU if the flag + KVM_S390_MEMOP_F_INJECT_EXCEPTION is set. ++On protection exceptions, unless specified otherwise, the injected ++translation-exception identifier (TEID) indicates suppression. + + If the KVM_S390_MEMOP_F_SKEY_PROTECTION flag is set, storage key + protection is also in effect and may cause exceptions if accesses are + prohibited given the access key designated by "key"; the valid range is 0..15. + KVM_S390_MEMOP_F_SKEY_PROTECTION is available if KVM_CAP_S390_MEM_OP_EXTENSION + is > 0. ++Since the accessed memory may span multiple pages and those pages might have ++different storage keys, it is possible that a protection exception occurs ++after memory has been modified. In this case, if the exception is injected, ++the TEID does not indicate suppression. + + Absolute read/write: + ^^^^^^^^^^^^^^^^^^^^ +--- a/arch/s390/kvm/gaccess.c ++++ b/arch/s390/kvm/gaccess.c +@@ -491,8 +491,8 @@ enum prot_type { + PROT_TYPE_IEP = 4, + }; + +-static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, +- u8 ar, enum gacc_mode mode, enum prot_type prot) ++static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar, ++ enum gacc_mode mode, enum prot_type prot, bool terminate) + { + struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; + struct trans_exc_code_bits *tec; +@@ -520,6 +520,11 @@ static int trans_exc(struct kvm_vcpu *vc + tec->b61 = 1; + break; + } ++ if (terminate) { ++ tec->b56 = 0; ++ tec->b60 = 0; ++ tec->b61 = 0; ++ } + fallthrough; + case PGM_ASCE_TYPE: + case PGM_PAGE_TRANSLATION: +@@ -552,6 +557,12 @@ static int trans_exc(struct kvm_vcpu *vc + return code; + } + ++static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar, ++ enum gacc_mode mode, enum prot_type prot) ++{ ++ return trans_exc_ending(vcpu, code, gva, ar, mode, prot, false); ++} ++ + static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, + unsigned long ga, u8 ar, enum gacc_mode mode) + { +@@ -1109,8 +1120,11 @@ int access_guest_with_key(struct kvm_vcp + data += fragment_len; + ga = kvm_s390_logical_to_effective(vcpu, ga + fragment_len); + } +- if (rc > 0) +- rc = trans_exc(vcpu, rc, ga, ar, mode, prot); ++ if (rc > 0) { ++ bool terminate = (mode == GACC_STORE) && (idx > 0); ++ ++ rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate); ++ } + out_unlock: + if (need_ipte_lock) + ipte_unlock(vcpu); diff --git a/patches.suse/KVM-s390-Fix-lockdep-issue-in-vm-memop b/patches.suse/KVM-s390-Fix-lockdep-issue-in-vm-memop new file mode 100644 index 0000000..0522cbd --- /dev/null +++ b/patches.suse/KVM-s390-Fix-lockdep-issue-in-vm-memop @@ -0,0 +1,52 @@ +From: Janis Schoetterl-Glausch +Date: Fri, 11 Mar 2022 18:00:40 +0100 +Subject: KVM: s390: Fix lockdep issue in vm memop +Git-commit: b5d1274409d0eec6d826f65d6dafebf9d77a1b99 +Patch-mainline: v5.18-rc6 +References: jsc#PED-579 + +Issuing a memop on a protected vm does not make sense, +neither is the memory readable/writable, nor does it make sense to check +storage keys. This is why the ioctl will return -EINVAL when it detects +the vm to be protected. However, in order to ensure that the vm cannot +become protected during the memop, the kvm->lock would need to be taken +for the duration of the ioctl. This is also required because +kvm_s390_pv_is_protected asserts that the lock must be held. +Instead, don't try to prevent this. If user space enables secure +execution concurrently with a memop it must accecpt the possibility of +the memop failing. +Still check if the vm is currently protected, but without locking and +consider it a heuristic. + +Fixes: ef11c9463ae0 ("KVM: s390: Add vm IOCTL for key checked guest absolute memory access") +Signed-off-by: Janis Schoetterl-Glausch +Reviewed-by: Janosch Frank +Reviewed-by: Claudio Imbrenda +Link: https://lore.kernel.org/r/20220322153204.2637400-1-scgl@linux.ibm.com +Signed-off-by: Christian Borntraeger +Signed-off-by: Heiko Carstens +Acked-by: Petr Tesarik +--- + arch/s390/kvm/kvm-s390.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +--- a/arch/s390/kvm/kvm-s390.c ++++ b/arch/s390/kvm/kvm-s390.c +@@ -2410,7 +2410,16 @@ static int kvm_s390_vm_mem_op(struct kvm + return -EINVAL; + if (mop->size > MEM_OP_MAX_SIZE) + return -E2BIG; +- if (kvm_s390_pv_is_protected(kvm)) ++ /* ++ * This is technically a heuristic only, if the kvm->lock is not ++ * taken, it is not guaranteed that the vm is/remains non-protected. ++ * This is ok from a kernel perspective, wrongdoing is detected ++ * on the access, -EFAULT is returned and the vm may crash the ++ * next time it accesses the memory in question. ++ * There is no sane usecase to do switching and a memop on two ++ * different CPUs at the same time. ++ */ ++ if (kvm_s390_pv_get_handle(kvm)) + return -EINVAL; + if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { + if (access_key_invalid(mop->key)) diff --git a/patches.suse/KVM-s390-Function-documentation-fixes b/patches.suse/KVM-s390-Function-documentation-fixes new file mode 100644 index 0000000..d864924 --- /dev/null +++ b/patches.suse/KVM-s390-Function-documentation-fixes @@ -0,0 +1,83 @@ +From: Janosch Frank +Date: Fri, 10 Sep 2021 08:04:20 +0000 +Subject: KVM: s390: Function documentation fixes +Git-commit: 25b5476a294cd5f7c7730f334f6b400d30bb783d +Patch-mainline: v5.15-rc7 +References: jsc#PED-579 + +The latest compile changes pointed us to a few instances where we use +the kernel documentation style but don't explain all variables or +don't adhere to it 100%. + +It's easy to fix so let's do that. + +Signed-off-by: Janosch Frank +Signed-off-by: Christian Borntraeger +Acked-by: Petr Tesarik +--- + arch/s390/kvm/gaccess.c | 12 ++++++++++++ + arch/s390/kvm/intercept.c | 4 +++- + 2 files changed, 15 insertions(+), 1 deletion(-) + +--- a/arch/s390/kvm/gaccess.c ++++ b/arch/s390/kvm/gaccess.c +@@ -894,6 +894,11 @@ int access_guest_real(struct kvm_vcpu *v + + /** + * guest_translate_address - translate guest logical into guest absolute address ++ * @vcpu: virtual cpu ++ * @gva: Guest virtual address ++ * @ar: Access register ++ * @gpa: Guest physical address ++ * @mode: Translation access mode + * + * Parameter semantics are the same as the ones from guest_translate. + * The memory contents at the guest address are not changed. +@@ -934,6 +939,11 @@ int guest_translate_address(struct kvm_v + + /** + * check_gva_range - test a range of guest virtual addresses for accessibility ++ * @vcpu: virtual cpu ++ * @gva: Guest virtual address ++ * @ar: Access register ++ * @length: Length of test range ++ * @mode: Translation access mode + */ + int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, + unsigned long length, enum gacc_mode mode) +@@ -956,6 +966,7 @@ int check_gva_range(struct kvm_vcpu *vcp + + /** + * kvm_s390_check_low_addr_prot_real - check for low-address protection ++ * @vcpu: virtual cpu + * @gra: Guest real address + * + * Checks whether an address is subject to low-address protection and set +@@ -979,6 +990,7 @@ int kvm_s390_check_low_addr_prot_real(st + * @pgt: pointer to the beginning of the page table for the given address if + * successful (return value 0), or to the first invalid DAT entry in + * case of exceptions (return value > 0) ++ * @dat_protection: referenced memory is write protected + * @fake: pgt references contiguous guest memory block, not a pgtable + */ + static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, +--- a/arch/s390/kvm/intercept.c ++++ b/arch/s390/kvm/intercept.c +@@ -269,6 +269,7 @@ static int handle_prog(struct kvm_vcpu * + + /** + * handle_external_interrupt - used for external interruption interceptions ++ * @vcpu: virtual cpu + * + * This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if + * the new PSW does not have external interrupts disabled. In the first case, +@@ -315,7 +316,8 @@ static int handle_external_interrupt(str + } + + /** +- * Handle MOVE PAGE partial execution interception. ++ * handle_mvpg_pei - Handle MOVE PAGE partial execution interception. ++ * @vcpu: virtual cpu + * + * This interception can only happen for guests with DAT disabled and + * addresses that are currently not mapped in the host. Thus we try to diff --git a/patches.suse/KVM-s390-Honor-storage-keys-when-accessing-guest-memory b/patches.suse/KVM-s390-Honor-storage-keys-when-accessing-guest-memory new file mode 100644 index 0000000..237fbb6 --- /dev/null +++ b/patches.suse/KVM-s390-Honor-storage-keys-when-accessing-guest-memory @@ -0,0 +1,535 @@ +From: Janis Schoetterl-Glausch +Date: Fri, 11 Feb 2022 19:22:07 +0100 +Subject: KVM: s390: Honor storage keys when accessing guest memory +Git-commit: e613d83454d7da1c37d78edb278db9c20afb21a2 +Patch-mainline: v5.18-rc1 +References: jsc#PED-579 + +Storage key checking had not been implemented for instructions emulated +by KVM. Implement it by enhancing the functions used for guest access, +in particular those making use of access_guest which has been renamed +to access_guest_with_key. +Accesses via access_guest_real should not be key checked. + +For actual accesses, key checking is done by +copy_from/to_user_key (which internally uses MVCOS/MVCP/MVCS). +In cases where accessibility is checked without an actual access, +this is performed by getting the storage key and checking if the access +key matches. In both cases, if applicable, storage and fetch protection +override are honored. + +Signed-off-by: Janis Schoetterl-Glausch +Reviewed-by: Janosch Frank +Reviewed-by: Christian Borntraeger +Link: https://lore.kernel.org/r/20220211182215.2730017-3-scgl@linux.ibm.com +Signed-off-by: Christian Borntraeger +Acked-by: Petr Tesarik +--- + arch/s390/include/asm/ctl_reg.h | 2 + arch/s390/include/asm/page.h | 2 + arch/s390/kvm/gaccess.c | 187 +++++++++++++++++++++++++++++++++++++--- + arch/s390/kvm/gaccess.h | 77 ++++++++++++++-- + arch/s390/kvm/intercept.c | 12 +- + arch/s390/kvm/kvm-s390.c | 4 + 6 files changed, 253 insertions(+), 31 deletions(-) + +--- a/arch/s390/include/asm/ctl_reg.h ++++ b/arch/s390/include/asm/ctl_reg.h +@@ -12,6 +12,8 @@ + + #define CR0_CLOCK_COMPARATOR_SIGN BIT(63 - 10) + #define CR0_LOW_ADDRESS_PROTECTION BIT(63 - 35) ++#define CR0_FETCH_PROTECTION_OVERRIDE BIT(63 - 38) ++#define CR0_STORAGE_PROTECTION_OVERRIDE BIT(63 - 39) + #define CR0_EMERGENCY_SIGNAL_SUBMASK BIT(63 - 49) + #define CR0_EXTERNAL_CALL_SUBMASK BIT(63 - 50) + #define CR0_CLOCK_COMPARATOR_SUBMASK BIT(63 - 52) +--- a/arch/s390/include/asm/page.h ++++ b/arch/s390/include/asm/page.h +@@ -20,6 +20,8 @@ + #define PAGE_SIZE _PAGE_SIZE + #define PAGE_MASK _PAGE_MASK + #define PAGE_DEFAULT_ACC 0 ++/* storage-protection override */ ++#define PAGE_SPO_ACC 9 + #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) + + #define HPAGE_SHIFT 20 +--- a/arch/s390/kvm/gaccess.c ++++ b/arch/s390/kvm/gaccess.c +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + + #include + #include "kvm-s390.h" +@@ -794,6 +795,79 @@ static int low_address_protection_enable + return 1; + } + ++static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode, ++ union asce asce) ++{ ++ psw_t *psw = &vcpu->arch.sie_block->gpsw; ++ unsigned long override; ++ ++ if (mode == GACC_FETCH || mode == GACC_IFETCH) { ++ /* check if fetch protection override enabled */ ++ override = vcpu->arch.sie_block->gcr[0]; ++ override &= CR0_FETCH_PROTECTION_OVERRIDE; ++ /* not applicable if subject to DAT && private space */ ++ override = override && !(psw_bits(*psw).dat && asce.p); ++ return override; ++ } ++ return false; ++} ++ ++static bool fetch_prot_override_applies(unsigned long ga, unsigned int len) ++{ ++ return ga < 2048 && ga + len <= 2048; ++} ++ ++static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu) ++{ ++ /* check if storage protection override enabled */ ++ return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE; ++} ++ ++static bool storage_prot_override_applies(u8 access_control) ++{ ++ /* matches special storage protection override key (9) -> allow */ ++ return access_control == PAGE_SPO_ACC; ++} ++ ++static int vcpu_check_access_key(struct kvm_vcpu *vcpu, u8 access_key, ++ enum gacc_mode mode, union asce asce, gpa_t gpa, ++ unsigned long ga, unsigned int len) ++{ ++ u8 storage_key, access_control; ++ unsigned long hva; ++ int r; ++ ++ /* access key 0 matches any storage key -> allow */ ++ if (access_key == 0) ++ return 0; ++ /* ++ * caller needs to ensure that gfn is accessible, so we can ++ * assume that this cannot fail ++ */ ++ hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa)); ++ mmap_read_lock(current->mm); ++ r = get_guest_storage_key(current->mm, hva, &storage_key); ++ mmap_read_unlock(current->mm); ++ if (r) ++ return r; ++ access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key); ++ /* access key matches storage key -> allow */ ++ if (access_control == access_key) ++ return 0; ++ if (mode == GACC_FETCH || mode == GACC_IFETCH) { ++ /* it is a fetch and fetch protection is off -> allow */ ++ if (!(storage_key & _PAGE_FP_BIT)) ++ return 0; ++ if (fetch_prot_override_applicable(vcpu, mode, asce) && ++ fetch_prot_override_applies(ga, len)) ++ return 0; ++ } ++ if (storage_prot_override_applicable(vcpu) && ++ storage_prot_override_applies(access_control)) ++ return 0; ++ return PGM_PROTECTION; ++} ++ + /** + * guest_range_to_gpas() - Calculate guest physical addresses of page fragments + * covering a logical range +@@ -804,6 +878,7 @@ static int low_address_protection_enable + * @len: length of range in bytes + * @asce: address-space-control element to use for translation + * @mode: access mode ++ * @access_key: access key to mach the range's storage keys against + * + * Translate a logical range to a series of guest absolute addresses, + * such that the concatenation of page fragments starting at each gpa make up +@@ -830,7 +905,8 @@ static int low_address_protection_enable + */ + static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, + unsigned long *gpas, unsigned long len, +- const union asce asce, enum gacc_mode mode) ++ const union asce asce, enum gacc_mode mode, ++ u8 access_key) + { + psw_t *psw = &vcpu->arch.sie_block->gpsw; + unsigned int offset = offset_in_page(ga); +@@ -857,6 +933,10 @@ static int guest_range_to_gpas(struct kv + } + if (rc) + return trans_exc(vcpu, rc, ga, ar, mode, prot); ++ rc = vcpu_check_access_key(vcpu, access_key, mode, asce, gpa, ga, ++ fragment_len); ++ if (rc) ++ return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_KEYC); + if (gpas) + *gpas++ = gpa; + offset = 0; +@@ -880,16 +960,54 @@ static int access_guest_page(struct kvm + return rc; + } + +-int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, +- unsigned long len, enum gacc_mode mode) ++static int ++access_guest_page_with_key(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, ++ void *data, unsigned int len, u8 access_key) ++{ ++ struct kvm_memory_slot *slot; ++ bool writable; ++ gfn_t gfn; ++ hva_t hva; ++ int rc; ++ ++ gfn = gpa >> PAGE_SHIFT; ++ slot = gfn_to_memslot(kvm, gfn); ++ hva = gfn_to_hva_memslot_prot(slot, gfn, &writable); ++ ++ if (kvm_is_error_hva(hva)) ++ return PGM_ADDRESSING; ++ /* ++ * Check if it's a ro memslot, even tho that can't occur (they're unsupported). ++ * Don't try to actually handle that case. ++ */ ++ if (!writable && mode == GACC_STORE) ++ return -EOPNOTSUPP; ++ hva += offset_in_page(gpa); ++ if (mode == GACC_STORE) ++ rc = copy_to_user_key((void __user *)hva, data, len, access_key); ++ else ++ rc = copy_from_user_key(data, (void __user *)hva, len, access_key); ++ if (rc) ++ return PGM_PROTECTION; ++ if (mode == GACC_STORE) ++ mark_page_dirty_in_slot(kvm, slot, gfn); ++ return 0; ++} ++ ++int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, ++ void *data, unsigned long len, enum gacc_mode mode, ++ u8 access_key) + { + psw_t *psw = &vcpu->arch.sie_block->gpsw; + unsigned long nr_pages, idx; + unsigned long gpa_array[2]; + unsigned int fragment_len; + unsigned long *gpas; ++ enum prot_type prot; + int need_ipte_lock; + union asce asce; ++ bool try_storage_prot_override; ++ bool try_fetch_prot_override; + int rc; + + if (!len) +@@ -904,16 +1022,47 @@ int access_guest(struct kvm_vcpu *vcpu, + gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long))); + if (!gpas) + return -ENOMEM; ++ try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce); ++ try_storage_prot_override = storage_prot_override_applicable(vcpu); + need_ipte_lock = psw_bits(*psw).dat && !asce.r; + if (need_ipte_lock) + ipte_lock(vcpu); +- rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode); +- for (idx = 0; idx < nr_pages && !rc; idx++) { ++ /* ++ * Since we do the access further down ultimately via a move instruction ++ * that does key checking and returns an error in case of a protection ++ * violation, we don't need to do the check during address translation. ++ * Skip it by passing access key 0, which matches any storage key, ++ * obviating the need for any further checks. As a result the check is ++ * handled entirely in hardware on access, we only need to take care to ++ * forego key protection checking if fetch protection override applies or ++ * retry with the special key 9 in case of storage protection override. ++ */ ++ rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0); ++ if (rc) ++ goto out_unlock; ++ for (idx = 0; idx < nr_pages; idx++) { + fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len); +- rc = access_guest_page(vcpu->kvm, mode, gpas[idx], data, fragment_len); ++ if (try_fetch_prot_override && fetch_prot_override_applies(ga, fragment_len)) { ++ rc = access_guest_page(vcpu->kvm, mode, gpas[idx], ++ data, fragment_len); ++ } else { ++ rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx], ++ data, fragment_len, access_key); ++ } ++ if (rc == PGM_PROTECTION && try_storage_prot_override) ++ rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx], ++ data, fragment_len, PAGE_SPO_ACC); ++ if (rc == PGM_PROTECTION) ++ prot = PROT_TYPE_KEYC; ++ if (rc) ++ break; + len -= fragment_len; + data += fragment_len; ++ ga = kvm_s390_logical_to_effective(vcpu, ga + fragment_len); + } ++ if (rc > 0) ++ rc = trans_exc(vcpu, rc, ga, ar, mode, prot); ++out_unlock: + if (need_ipte_lock) + ipte_unlock(vcpu); + if (nr_pages > ARRAY_SIZE(gpa_array)) +@@ -940,12 +1089,13 @@ int access_guest_real(struct kvm_vcpu *v + } + + /** +- * guest_translate_address - translate guest logical into guest absolute address ++ * guest_translate_address_with_key - translate guest logical into guest absolute address + * @vcpu: virtual cpu + * @gva: Guest virtual address + * @ar: Access register + * @gpa: Guest physical address + * @mode: Translation access mode ++ * @access_key: access key to mach the storage key with + * + * Parameter semantics are the same as the ones from guest_translate. + * The memory contents at the guest address are not changed. +@@ -953,8 +1103,9 @@ int access_guest_real(struct kvm_vcpu *v + * Note: The IPTE lock is not taken during this function, so the caller + * has to take care of this. + */ +-int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, +- unsigned long *gpa, enum gacc_mode mode) ++int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, ++ unsigned long *gpa, enum gacc_mode mode, ++ u8 access_key) + { + union asce asce; + int rc; +@@ -963,7 +1114,17 @@ int guest_translate_address(struct kvm_v + rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); + if (rc) + return rc; +- return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode); ++ return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode, ++ access_key); ++} ++ ++int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, ++ unsigned long *gpa, enum gacc_mode mode) ++{ ++ u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key; ++ ++ return guest_translate_address_with_key(vcpu, gva, ar, gpa, mode, ++ access_key); + } + + /** +@@ -973,9 +1134,10 @@ int guest_translate_address(struct kvm_v + * @ar: Access register + * @length: Length of test range + * @mode: Translation access mode ++ * @access_key: access key to mach the storage keys with + */ + int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, +- unsigned long length, enum gacc_mode mode) ++ unsigned long length, enum gacc_mode mode, u8 access_key) + { + union asce asce; + int rc = 0; +@@ -984,7 +1146,8 @@ int check_gva_range(struct kvm_vcpu *vcp + if (rc) + return rc; + ipte_lock(vcpu); +- rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode); ++ rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode, ++ access_key); + ipte_unlock(vcpu); + + return rc; +--- a/arch/s390/kvm/gaccess.h ++++ b/arch/s390/kvm/gaccess.h +@@ -186,24 +186,31 @@ enum gacc_mode { + GACC_IFETCH, + }; + ++int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, ++ unsigned long *gpa, enum gacc_mode mode, ++ u8 access_key); ++ + int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, + u8 ar, unsigned long *gpa, enum gacc_mode mode); ++ + int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, +- unsigned long length, enum gacc_mode mode); ++ unsigned long length, enum gacc_mode mode, u8 access_key); + +-int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, +- unsigned long len, enum gacc_mode mode); ++int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, ++ void *data, unsigned long len, enum gacc_mode mode, ++ u8 access_key); + + int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, + void *data, unsigned long len, enum gacc_mode mode); + + /** +- * write_guest - copy data from kernel space to guest space ++ * write_guest_with_key - copy data from kernel space to guest space + * @vcpu: virtual cpu + * @ga: guest address + * @ar: access register + * @data: source address in kernel space + * @len: number of bytes to copy ++ * @access_key: access key the storage key needs to match + * + * Copy @len bytes from @data (kernel space) to @ga (guest address). + * In order to copy data to guest space the PSW of the vcpu is inspected: +@@ -214,8 +221,8 @@ int access_guest_real(struct kvm_vcpu *v + * The addressing mode of the PSW is also inspected, so that address wrap + * around is taken into account for 24-, 31- and 64-bit addressing mode, + * if the to be copied data crosses page boundaries in guest address space. +- * In addition also low address and DAT protection are inspected before +- * copying any data (key protection is currently not implemented). ++ * In addition low address, DAT and key protection checks are performed before ++ * copying any data. + * + * This function modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu. + * In case of an access exception (e.g. protection exception) pgm will contain +@@ -243,10 +250,53 @@ int access_guest_real(struct kvm_vcpu *v + * if data has been changed in guest space in case of an exception. + */ + static inline __must_check ++int write_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, ++ void *data, unsigned long len, u8 access_key) ++{ ++ return access_guest_with_key(vcpu, ga, ar, data, len, GACC_STORE, ++ access_key); ++} ++ ++/** ++ * write_guest - copy data from kernel space to guest space ++ * @vcpu: virtual cpu ++ * @ga: guest address ++ * @ar: access register ++ * @data: source address in kernel space ++ * @len: number of bytes to copy ++ * ++ * The behaviour of write_guest is identical to write_guest_with_key, except ++ * that the PSW access key is used instead of an explicit argument. ++ */ ++static inline __must_check + int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, + unsigned long len) + { +- return access_guest(vcpu, ga, ar, data, len, GACC_STORE); ++ u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key; ++ ++ return write_guest_with_key(vcpu, ga, ar, data, len, access_key); ++} ++ ++/** ++ * read_guest_with_key - copy data from guest space to kernel space ++ * @vcpu: virtual cpu ++ * @ga: guest address ++ * @ar: access register ++ * @data: destination address in kernel space ++ * @len: number of bytes to copy ++ * @access_key: access key the storage key needs to match ++ * ++ * Copy @len bytes from @ga (guest address) to @data (kernel space). ++ * ++ * The behaviour of read_guest_with_key is identical to write_guest_with_key, ++ * except that data will be copied from guest space to kernel space. ++ */ ++static inline __must_check ++int read_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, ++ void *data, unsigned long len, u8 access_key) ++{ ++ return access_guest_with_key(vcpu, ga, ar, data, len, GACC_FETCH, ++ access_key); + } + + /** +@@ -259,14 +309,16 @@ int write_guest(struct kvm_vcpu *vcpu, u + * + * Copy @len bytes from @ga (guest address) to @data (kernel space). + * +- * The behaviour of read_guest is identical to write_guest, except that +- * data will be copied from guest space to kernel space. ++ * The behaviour of read_guest is identical to read_guest_with_key, except ++ * that the PSW access key is used instead of an explicit argument. + */ + static inline __must_check + int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, + unsigned long len) + { +- return access_guest(vcpu, ga, ar, data, len, GACC_FETCH); ++ u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key; ++ ++ return read_guest_with_key(vcpu, ga, ar, data, len, access_key); + } + + /** +@@ -287,7 +339,10 @@ static inline __must_check + int read_guest_instr(struct kvm_vcpu *vcpu, unsigned long ga, void *data, + unsigned long len) + { +- return access_guest(vcpu, ga, 0, data, len, GACC_IFETCH); ++ u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key; ++ ++ return access_guest_with_key(vcpu, ga, 0, data, len, GACC_IFETCH, ++ access_key); + } + + /** +--- a/arch/s390/kvm/intercept.c ++++ b/arch/s390/kvm/intercept.c +@@ -331,18 +331,18 @@ static int handle_mvpg_pei(struct kvm_vc + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + +- /* Make sure that the source is paged-in */ +- rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2], +- reg2, &srcaddr, GACC_FETCH); ++ /* Ensure that the source is paged-in, no actual access -> no key checking */ ++ rc = guest_translate_address_with_key(vcpu, vcpu->run->s.regs.gprs[reg2], ++ reg2, &srcaddr, GACC_FETCH, 0); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0); + if (rc != 0) + return rc; + +- /* Make sure that the destination is paged-in */ +- rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1], +- reg1, &dstaddr, GACC_STORE); ++ /* Ensure that the source is paged-in, no actual access -> no key checking */ ++ rc = guest_translate_address_with_key(vcpu, vcpu->run->s.regs.gprs[reg1], ++ reg1, &dstaddr, GACC_STORE, 0); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1); +--- a/arch/s390/kvm/kvm-s390.c ++++ b/arch/s390/kvm/kvm-s390.c +@@ -4734,7 +4734,7 @@ static long kvm_s390_guest_mem_op(struct + case KVM_S390_MEMOP_LOGICAL_READ: + if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { + r = check_gva_range(vcpu, mop->gaddr, mop->ar, +- mop->size, GACC_FETCH); ++ mop->size, GACC_FETCH, 0); + break; + } + r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); +@@ -4746,7 +4746,7 @@ static long kvm_s390_guest_mem_op(struct + case KVM_S390_MEMOP_LOGICAL_WRITE: + if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { + r = check_gva_range(vcpu, mop->gaddr, mop->ar, +- mop->size, GACC_STORE); ++ mop->size, GACC_STORE, 0); + break; + } + if (copy_from_user(tmpbuf, uaddr, mop->size)) { diff --git a/patches.suse/KVM-s390-Rename-existing-vcpu-memop-functions b/patches.suse/KVM-s390-Rename-existing-vcpu-memop-functions new file mode 100644 index 0000000..01b876f --- /dev/null +++ b/patches.suse/KVM-s390-Rename-existing-vcpu-memop-functions @@ -0,0 +1,79 @@ +From: Janis Schoetterl-Glausch +Date: Fri, 11 Feb 2022 19:22:12 +0100 +Subject: KVM: s390: Rename existing vcpu memop functions +Git-commit: 0e1234c02b77ef22d9cf78f86b98347ceb170090 +Patch-mainline: v5.18-rc1 +References: jsc#PED-579 + +Makes the naming consistent, now that we also have a vm ioctl. + +Signed-off-by: Janis Schoetterl-Glausch +Reviewed-by: Janosch Frank +Reviewed-by: Claudio Imbrenda +Link: https://lore.kernel.org/r/20220211182215.2730017-8-scgl@linux.ibm.com +Signed-off-by: Christian Borntraeger +Acked-by: Petr Tesarik +--- + arch/s390/kvm/kvm-s390.c | 19 ++++++++++--------- + 1 file changed, 10 insertions(+), 9 deletions(-) + +--- a/arch/s390/kvm/kvm-s390.c ++++ b/arch/s390/kvm/kvm-s390.c +@@ -4762,8 +4762,8 @@ static int kvm_vcpu_ioctl_enable_cap(str + return r; + } + +-static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu, +- struct kvm_s390_mem_op *mop) ++static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu, ++ struct kvm_s390_mem_op *mop) + { + void __user *uaddr = (void __user *)mop->buf; + int r = 0; +@@ -4792,8 +4792,9 @@ static long kvm_s390_guest_sida_op(struc + } + return r; + } +-static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, +- struct kvm_s390_mem_op *mop) ++ ++static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu, ++ struct kvm_s390_mem_op *mop) + { + void __user *uaddr = (void __user *)mop->buf; + void *tmpbuf = NULL; +@@ -4856,8 +4857,8 @@ static long kvm_s390_guest_mem_op(struct + return r; + } + +-static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu, +- struct kvm_s390_mem_op *mop) ++static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu, ++ struct kvm_s390_mem_op *mop) + { + int r, srcu_idx; + +@@ -4866,12 +4867,12 @@ static long kvm_s390_guest_memsida_op(st + switch (mop->op) { + case KVM_S390_MEMOP_LOGICAL_READ: + case KVM_S390_MEMOP_LOGICAL_WRITE: +- r = kvm_s390_guest_mem_op(vcpu, mop); ++ r = kvm_s390_vcpu_mem_op(vcpu, mop); + break; + case KVM_S390_MEMOP_SIDA_READ: + case KVM_S390_MEMOP_SIDA_WRITE: + /* we are locked against sida going away by the vcpu->mutex */ +- r = kvm_s390_guest_sida_op(vcpu, mop); ++ r = kvm_s390_vcpu_sida_op(vcpu, mop); + break; + default: + r = -EINVAL; +@@ -5034,7 +5035,7 @@ long kvm_arch_vcpu_ioctl(struct file *fi + struct kvm_s390_mem_op mem_op; + + if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) +- r = kvm_s390_guest_memsida_op(vcpu, &mem_op); ++ r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op); + else + r = -EFAULT; + break; diff --git a/patches.suse/KVM-s390-Update-api-documentation-for-memop-ioctl b/patches.suse/KVM-s390-Update-api-documentation-for-memop-ioctl new file mode 100644 index 0000000..df3cec2 --- /dev/null +++ b/patches.suse/KVM-s390-Update-api-documentation-for-memop-ioctl @@ -0,0 +1,175 @@ +From: Janis Schoetterl-Glausch +Date: Fri, 11 Feb 2022 19:22:14 +0100 +Subject: KVM: s390: Update api documentation for memop ioctl +Git-commit: 5e35d0eb472b48ac9c8ef7017753b8a1f765aa01 +Patch-mainline: v5.18-rc1 +References: jsc#PED-579 + +Document all currently existing operations, flags and explain under +which circumstances they are available. Document the recently +introduced absolute operations and the storage key protection flag, +as well as the existing SIDA operations. + +Signed-off-by: Janis Schoetterl-Glausch +Reviewed-by: Janosch Frank +Link: https://lore.kernel.org/r/20220211182215.2730017-10-scgl@linux.ibm.com +Signed-off-by: Christian Borntraeger +Acked-by: Petr Tesarik +--- + Documentation/virt/kvm/api.rst | 112 ++++++++++++++++++++++++++++++++--------- + include/uapi/linux/kvm.h | 2 + 2 files changed, 91 insertions(+), 23 deletions(-) + +--- a/Documentation/virt/kvm/api.rst ++++ b/Documentation/virt/kvm/api.rst +@@ -3461,15 +3461,17 @@ The fields in each entry are defined as + 4.89 KVM_S390_MEM_OP + -------------------- + +-:Capability: KVM_CAP_S390_MEM_OP ++:Capability: KVM_CAP_S390_MEM_OP, KVM_CAP_S390_PROTECTED, KVM_CAP_S390_MEM_OP_EXTENSION + :Architectures: s390 +-:Type: vcpu ioctl ++:Type: vm ioctl, vcpu ioctl + :Parameters: struct kvm_s390_mem_op (in) + :Returns: = 0 on success, + < 0 on generic error (e.g. -EFAULT or -ENOMEM), + > 0 if an exception occurred while walking the page tables + +-Read or write data from/to the logical (virtual) memory of a VCPU. ++Read or write data from/to the VM's memory. ++The KVM_CAP_S390_MEM_OP_EXTENSION capability specifies what functionality is ++supported. + + Parameters are specified via the following structure:: + +@@ -3479,33 +3481,99 @@ Parameters are specified via the followi + __u32 size; /* amount of bytes */ + __u32 op; /* type of operation */ + __u64 buf; /* buffer in userspace */ +- __u8 ar; /* the access register number */ +- __u8 reserved[31]; /* should be set to 0 */ ++ union { ++ struct { ++ __u8 ar; /* the access register number */ ++ __u8 key; /* access key, ignored if flag unset */ ++ }; ++ __u32 sida_offset; /* offset into the sida */ ++ __u8 reserved[32]; /* ignored */ ++ }; + }; + +-The type of operation is specified in the "op" field. It is either +-KVM_S390_MEMOP_LOGICAL_READ for reading from logical memory space or +-KVM_S390_MEMOP_LOGICAL_WRITE for writing to logical memory space. The +-KVM_S390_MEMOP_F_CHECK_ONLY flag can be set in the "flags" field to check +-whether the corresponding memory access would create an access exception +-(without touching the data in the memory at the destination). In case an +-access exception occurred while walking the MMU tables of the guest, the +-ioctl returns a positive error number to indicate the type of exception. +-This exception is also raised directly at the corresponding VCPU if the +-flag KVM_S390_MEMOP_F_INJECT_EXCEPTION is set in the "flags" field. +- + The start address of the memory region has to be specified in the "gaddr" + field, and the length of the region in the "size" field (which must not + be 0). The maximum value for "size" can be obtained by checking the + KVM_CAP_S390_MEM_OP capability. "buf" is the buffer supplied by the + userspace application where the read data should be written to for +-KVM_S390_MEMOP_LOGICAL_READ, or where the data that should be written is +-stored for a KVM_S390_MEMOP_LOGICAL_WRITE. When KVM_S390_MEMOP_F_CHECK_ONLY +-is specified, "buf" is unused and can be NULL. "ar" designates the access +-register number to be used; the valid range is 0..15. ++a read access, or where the data that should be written is stored for ++a write access. The "reserved" field is meant for future extensions. ++Reserved and unused values are ignored. Future extension that add members must ++introduce new flags. ++ ++The type of operation is specified in the "op" field. Flags modifying ++their behavior can be set in the "flags" field. Undefined flag bits must ++be set to 0. ++ ++Possible operations are: ++ * ``KVM_S390_MEMOP_LOGICAL_READ`` ++ * ``KVM_S390_MEMOP_LOGICAL_WRITE`` ++ * ``KVM_S390_MEMOP_ABSOLUTE_READ`` ++ * ``KVM_S390_MEMOP_ABSOLUTE_WRITE`` ++ * ``KVM_S390_MEMOP_SIDA_READ`` ++ * ``KVM_S390_MEMOP_SIDA_WRITE`` ++ ++Logical read/write: ++^^^^^^^^^^^^^^^^^^^ ++ ++Access logical memory, i.e. translate the given guest address to an absolute ++address given the state of the VCPU and use the absolute address as target of ++the access. "ar" designates the access register number to be used; the valid ++range is 0..15. ++Logical accesses are permitted for the VCPU ioctl only. ++Logical accesses are permitted for non-protected guests only. ++ ++Supported flags: ++ * ``KVM_S390_MEMOP_F_CHECK_ONLY`` ++ * ``KVM_S390_MEMOP_F_INJECT_EXCEPTION`` ++ * ``KVM_S390_MEMOP_F_SKEY_PROTECTION`` ++ ++The KVM_S390_MEMOP_F_CHECK_ONLY flag can be set to check whether the ++corresponding memory access would cause an access exception; however, ++no actual access to the data in memory at the destination is performed. ++In this case, "buf" is unused and can be NULL. ++ ++In case an access exception occurred during the access (or would occur ++in case of KVM_S390_MEMOP_F_CHECK_ONLY), the ioctl returns a positive ++error number indicating the type of exception. This exception is also ++raised directly at the corresponding VCPU if the flag ++KVM_S390_MEMOP_F_INJECT_EXCEPTION is set. ++ ++If the KVM_S390_MEMOP_F_SKEY_PROTECTION flag is set, storage key ++protection is also in effect and may cause exceptions if accesses are ++prohibited given the access key passed in "key". ++KVM_S390_MEMOP_F_SKEY_PROTECTION is available if KVM_CAP_S390_MEM_OP_EXTENSION ++is > 0. ++ ++Absolute read/write: ++^^^^^^^^^^^^^^^^^^^^ ++ ++Access absolute memory. This operation is intended to be used with the ++KVM_S390_MEMOP_F_SKEY_PROTECTION flag, to allow accessing memory and performing ++the checks required for storage key protection as one operation (as opposed to ++user space getting the storage keys, performing the checks, and accessing ++memory thereafter, which could lead to a delay between check and access). ++Absolute accesses are permitted for the VM ioctl if KVM_CAP_S390_MEM_OP_EXTENSION ++is > 0. ++Currently absolute accesses are not permitted for VCPU ioctls. ++Absolute accesses are permitted for non-protected guests only. ++ ++Supported flags: ++ * ``KVM_S390_MEMOP_F_CHECK_ONLY`` ++ * ``KVM_S390_MEMOP_F_SKEY_PROTECTION`` ++ ++The semantics of the flags are as for logical accesses. ++ ++SIDA read/write: ++^^^^^^^^^^^^^^^^ ++ ++Access the secure instruction data area which contains memory operands necessary ++for instruction emulation for protected guests. ++SIDA accesses are available if the KVM_CAP_S390_PROTECTED capability is available. ++SIDA accesses are permitted for the VCPU ioctl only. ++SIDA accesses are permitted for protected guests only. + +-The "reserved" field is meant for future extensions. It is not used by +-KVM with the currently defined set of flags. ++No flags are supported. + + 4.90 KVM_S390_GET_SKEYS + ----------------------- +--- a/include/uapi/linux/kvm.h ++++ b/include/uapi/linux/kvm.h +@@ -549,7 +549,7 @@ struct kvm_s390_mem_op { + __u8 key; /* access key, ignored if flag unset */ + }; + __u32 sida_offset; /* offset into the sida */ +- __u8 reserved[32]; /* should be set to 0 */ ++ __u8 reserved[32]; /* ignored */ + }; + }; + /* types for kvm_s390_mem_op->op */ diff --git a/patches.suse/KVM-s390-gaccess-Cleanup-access-to-guest-pages b/patches.suse/KVM-s390-gaccess-Cleanup-access-to-guest-pages new file mode 100644 index 0000000..b698794 --- /dev/null +++ b/patches.suse/KVM-s390-gaccess-Cleanup-access-to-guest-pages @@ -0,0 +1,67 @@ +From: Janis Schoetterl-Glausch +Date: Fri, 26 Nov 2021 17:45:49 +0100 +Subject: KVM: s390: gaccess: Cleanup access to guest pages +Git-commit: bad13799e0305deb258372b7298a86be4c78aaba +Patch-mainline: v5.17-rc1 +References: jsc#PED-579 + +Introduce a helper function for guest frame access. + +Signed-off-by: Janis Schoetterl-Glausch +Reviewed-by: Janosch Frank +Reviewed-by: David Hildenbrand +Reviewed-by: Claudio Imbrenda +Message-Id: <20211126164549.7046-4-scgl@linux.ibm.com> +Signed-off-by: Janosch Frank +Acked-by: Petr Tesarik +--- + arch/s390/kvm/gaccess.c | 24 ++++++++++++++++-------- + 1 file changed, 16 insertions(+), 8 deletions(-) + +--- a/arch/s390/kvm/gaccess.c ++++ b/arch/s390/kvm/gaccess.c +@@ -866,6 +866,20 @@ static int guest_range_to_gpas(struct kv + return 0; + } + ++static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, ++ void *data, unsigned int len) ++{ ++ const unsigned int offset = offset_in_page(gpa); ++ const gfn_t gfn = gpa_to_gfn(gpa); ++ int rc; ++ ++ if (mode == GACC_STORE) ++ rc = kvm_write_guest_page(kvm, gfn, data, offset, len); ++ else ++ rc = kvm_read_guest_page(kvm, gfn, data, offset, len); ++ return rc; ++} ++ + int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, + unsigned long len, enum gacc_mode mode) + { +@@ -896,10 +910,7 @@ int access_guest(struct kvm_vcpu *vcpu, + rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode); + for (idx = 0; idx < nr_pages && !rc; idx++) { + fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len); +- if (mode == GACC_STORE) +- rc = kvm_write_guest(vcpu->kvm, gpas[idx], data, fragment_len); +- else +- rc = kvm_read_guest(vcpu->kvm, gpas[idx], data, fragment_len); ++ rc = access_guest_page(vcpu->kvm, mode, gpas[idx], data, fragment_len); + len -= fragment_len; + data += fragment_len; + } +@@ -920,10 +931,7 @@ int access_guest_real(struct kvm_vcpu *v + while (len && !rc) { + gpa = kvm_s390_real_to_abs(vcpu, gra); + fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len); +- if (mode) +- rc = write_guest_abs(vcpu, gpa, data, fragment_len); +- else +- rc = read_guest_abs(vcpu, gpa, data, fragment_len); ++ rc = access_guest_page(vcpu->kvm, mode, gpa, data, fragment_len); + len -= fragment_len; + gra += fragment_len; + data += fragment_len; diff --git a/patches.suse/KVM-s390-gaccess-Refactor-access-address-range-check b/patches.suse/KVM-s390-gaccess-Refactor-access-address-range-check new file mode 100644 index 0000000..1ffa4da --- /dev/null +++ b/patches.suse/KVM-s390-gaccess-Refactor-access-address-range-check @@ -0,0 +1,225 @@ +From: Janis Schoetterl-Glausch +Date: Fri, 26 Nov 2021 17:45:48 +0100 +Subject: KVM: s390: gaccess: Refactor access address range check +Git-commit: 7faa543df19bf62d4583a64d3902705747f2ad29 +Patch-mainline: v5.17-rc1 +References: jsc#PED-579 + +Do not round down the first address to the page boundary, just translate +it normally, which gives the value we care about in the first place. +Given this, translating a single address is just the special case of +translating a range spanning a single page. + +Make the output optional, so the function can be used to just check a +range. + +Signed-off-by: Janis Schoetterl-Glausch +Reviewed-by: Janosch Frank +Reviewed-by: Claudio Imbrenda +Message-Id: <20211126164549.7046-3-scgl@linux.ibm.com> +Signed-off-by: Janosch Frank +Acked-by: Petr Tesarik +--- + arch/s390/kvm/gaccess.c | 122 +++++++++++++++++++++++++++--------------------- + 1 file changed, 69 insertions(+), 53 deletions(-) + +--- a/arch/s390/kvm/gaccess.c ++++ b/arch/s390/kvm/gaccess.c +@@ -794,35 +794,74 @@ static int low_address_protection_enable + return 1; + } + +-static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, +- unsigned long *pages, unsigned long nr_pages, +- const union asce asce, enum gacc_mode mode) ++/** ++ * guest_range_to_gpas() - Calculate guest physical addresses of page fragments ++ * covering a logical range ++ * @vcpu: virtual cpu ++ * @ga: guest address, start of range ++ * @ar: access register ++ * @gpas: output argument, may be NULL ++ * @len: length of range in bytes ++ * @asce: address-space-control element to use for translation ++ * @mode: access mode ++ * ++ * Translate a logical range to a series of guest absolute addresses, ++ * such that the concatenation of page fragments starting at each gpa make up ++ * the whole range. ++ * The translation is performed as if done by the cpu for the given @asce, @ar, ++ * @mode and state of the @vcpu. ++ * If the translation causes an exception, its program interruption code is ++ * returned and the &struct kvm_s390_pgm_info pgm member of @vcpu is modified ++ * such that a subsequent call to kvm_s390_inject_prog_vcpu() will inject ++ * a correct exception into the guest. ++ * The resulting gpas are stored into @gpas, unless it is NULL. ++ * ++ * Note: All fragments except the first one start at the beginning of a page. ++ * When deriving the boundaries of a fragment from a gpa, all but the last ++ * fragment end at the end of the page. ++ * ++ * Return: ++ * * 0 - success ++ * * <0 - translation could not be performed, for example if guest ++ * memory could not be accessed ++ * * >0 - an access exception occurred. In this case the returned value ++ * is the program interruption code and the contents of pgm may ++ * be used to inject an exception into the guest. ++ */ ++static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, ++ unsigned long *gpas, unsigned long len, ++ const union asce asce, enum gacc_mode mode) + { + psw_t *psw = &vcpu->arch.sie_block->gpsw; ++ unsigned int offset = offset_in_page(ga); ++ unsigned int fragment_len; + int lap_enabled, rc = 0; + enum prot_type prot; ++ unsigned long gpa; + + lap_enabled = low_address_protection_enabled(vcpu, asce); +- while (nr_pages) { ++ while (min(PAGE_SIZE - offset, len) > 0) { ++ fragment_len = min(PAGE_SIZE - offset, len); + ga = kvm_s390_logical_to_effective(vcpu, ga); + if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) + return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode, + PROT_TYPE_LA); +- ga &= PAGE_MASK; + if (psw_bits(*psw).dat) { +- rc = guest_translate(vcpu, ga, pages, asce, mode, &prot); ++ rc = guest_translate(vcpu, ga, &gpa, asce, mode, &prot); + if (rc < 0) + return rc; + } else { +- *pages = kvm_s390_real_to_abs(vcpu, ga); +- if (kvm_is_error_gpa(vcpu->kvm, *pages)) ++ gpa = kvm_s390_real_to_abs(vcpu, ga); ++ if (kvm_is_error_gpa(vcpu->kvm, gpa)) + rc = PGM_ADDRESSING; + } + if (rc) + return trans_exc(vcpu, rc, ga, ar, mode, prot); +- ga += PAGE_SIZE; +- pages++; +- nr_pages--; ++ if (gpas) ++ *gpas++ = gpa; ++ offset = 0; ++ ga += fragment_len; ++ len -= fragment_len; + } + return 0; + } +@@ -831,10 +870,10 @@ int access_guest(struct kvm_vcpu *vcpu, + unsigned long len, enum gacc_mode mode) + { + psw_t *psw = &vcpu->arch.sie_block->gpsw; +- unsigned long nr_pages, gpa, idx; +- unsigned long pages_array[2]; ++ unsigned long nr_pages, idx; ++ unsigned long gpa_array[2]; + unsigned int fragment_len; +- unsigned long *pages; ++ unsigned long *gpas; + int need_ipte_lock; + union asce asce; + int rc; +@@ -846,30 +885,28 @@ int access_guest(struct kvm_vcpu *vcpu, + if (rc) + return rc; + nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; +- pages = pages_array; +- if (nr_pages > ARRAY_SIZE(pages_array)) +- pages = vmalloc(array_size(nr_pages, sizeof(unsigned long))); +- if (!pages) ++ gpas = gpa_array; ++ if (nr_pages > ARRAY_SIZE(gpa_array)) ++ gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long))); ++ if (!gpas) + return -ENOMEM; + need_ipte_lock = psw_bits(*psw).dat && !asce.r; + if (need_ipte_lock) + ipte_lock(vcpu); +- rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode); ++ rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode); + for (idx = 0; idx < nr_pages && !rc; idx++) { +- gpa = pages[idx] + offset_in_page(ga); +- fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len); ++ fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len); + if (mode == GACC_STORE) +- rc = kvm_write_guest(vcpu->kvm, gpa, data, fragment_len); ++ rc = kvm_write_guest(vcpu->kvm, gpas[idx], data, fragment_len); + else +- rc = kvm_read_guest(vcpu->kvm, gpa, data, fragment_len); ++ rc = kvm_read_guest(vcpu->kvm, gpas[idx], data, fragment_len); + len -= fragment_len; +- ga += fragment_len; + data += fragment_len; + } + if (need_ipte_lock) + ipte_unlock(vcpu); +- if (nr_pages > ARRAY_SIZE(pages_array)) +- vfree(pages); ++ if (nr_pages > ARRAY_SIZE(gpa_array)) ++ vfree(gpas); + return rc; + } + +@@ -911,8 +948,6 @@ int access_guest_real(struct kvm_vcpu *v + int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, + unsigned long *gpa, enum gacc_mode mode) + { +- psw_t *psw = &vcpu->arch.sie_block->gpsw; +- enum prot_type prot; + union asce asce; + int rc; + +@@ -920,23 +955,7 @@ int guest_translate_address(struct kvm_v + rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); + if (rc) + return rc; +- if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) { +- if (mode == GACC_STORE) +- return trans_exc(vcpu, PGM_PROTECTION, gva, 0, +- mode, PROT_TYPE_LA); +- } +- +- if (psw_bits(*psw).dat && !asce.r) { /* Use DAT? */ +- rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot); +- if (rc > 0) +- return trans_exc(vcpu, rc, gva, 0, mode, prot); +- } else { +- *gpa = kvm_s390_real_to_abs(vcpu, gva); +- if (kvm_is_error_gpa(vcpu->kvm, *gpa)) +- return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0); +- } +- +- return rc; ++ return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode); + } + + /** +@@ -950,17 +969,14 @@ int guest_translate_address(struct kvm_v + int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, + unsigned long length, enum gacc_mode mode) + { +- unsigned long gpa; +- unsigned long currlen; ++ union asce asce; + int rc = 0; + ++ rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); ++ if (rc) ++ return rc; + ipte_lock(vcpu); +- while (length > 0 && !rc) { +- currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE)); +- rc = guest_translate_address(vcpu, gva, ar, &gpa, mode); +- gva += currlen; +- length -= currlen; +- } ++ rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode); + ipte_unlock(vcpu); + + return rc; diff --git a/patches.suse/KVM-s390-gaccess-Refactor-gpa-and-length-calculation b/patches.suse/KVM-s390-gaccess-Refactor-gpa-and-length-calculation new file mode 100644 index 0000000..1e919ec --- /dev/null +++ b/patches.suse/KVM-s390-gaccess-Refactor-gpa-and-length-calculation @@ -0,0 +1,85 @@ +From: Janis Schoetterl-Glausch +Date: Fri, 26 Nov 2021 17:45:47 +0100 +Subject: KVM: s390: gaccess: Refactor gpa and length calculation +Git-commit: 416e7f0c9d613bf84e182eba9547ae8f9f5bfa4c +Patch-mainline: v5.17-rc1 +References: jsc#PED-579 + +Improve readability by renaming the length variable and +not calculating the offset manually. + +Signed-off-by: Janis Schoetterl-Glausch +Reviewed-by: Janosch Frank +Reviewed-by: David Hildenbrand +Reviewed-by: Claudio Imbrenda +Message-Id: <20211126164549.7046-2-scgl@linux.ibm.com> +Signed-off-by: Janosch Frank +Acked-by: Petr Tesarik +--- + arch/s390/kvm/gaccess.c | 32 +++++++++++++++++--------------- + 1 file changed, 17 insertions(+), 15 deletions(-) + +--- a/arch/s390/kvm/gaccess.c ++++ b/arch/s390/kvm/gaccess.c +@@ -831,8 +831,9 @@ int access_guest(struct kvm_vcpu *vcpu, + unsigned long len, enum gacc_mode mode) + { + psw_t *psw = &vcpu->arch.sie_block->gpsw; +- unsigned long _len, nr_pages, gpa, idx; ++ unsigned long nr_pages, gpa, idx; + unsigned long pages_array[2]; ++ unsigned int fragment_len; + unsigned long *pages; + int need_ipte_lock; + union asce asce; +@@ -855,15 +856,15 @@ int access_guest(struct kvm_vcpu *vcpu, + ipte_lock(vcpu); + rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode); + for (idx = 0; idx < nr_pages && !rc; idx++) { +- gpa = *(pages + idx) + (ga & ~PAGE_MASK); +- _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); ++ gpa = pages[idx] + offset_in_page(ga); ++ fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len); + if (mode == GACC_STORE) +- rc = kvm_write_guest(vcpu->kvm, gpa, data, _len); ++ rc = kvm_write_guest(vcpu->kvm, gpa, data, fragment_len); + else +- rc = kvm_read_guest(vcpu->kvm, gpa, data, _len); +- len -= _len; +- ga += _len; +- data += _len; ++ rc = kvm_read_guest(vcpu->kvm, gpa, data, fragment_len); ++ len -= fragment_len; ++ ga += fragment_len; ++ data += fragment_len; + } + if (need_ipte_lock) + ipte_unlock(vcpu); +@@ -875,19 +876,20 @@ int access_guest(struct kvm_vcpu *vcpu, + int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, + void *data, unsigned long len, enum gacc_mode mode) + { +- unsigned long _len, gpa; ++ unsigned int fragment_len; ++ unsigned long gpa; + int rc = 0; + + while (len && !rc) { + gpa = kvm_s390_real_to_abs(vcpu, gra); +- _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); ++ fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len); + if (mode) +- rc = write_guest_abs(vcpu, gpa, data, _len); ++ rc = write_guest_abs(vcpu, gpa, data, fragment_len); + else +- rc = read_guest_abs(vcpu, gpa, data, _len); +- len -= _len; +- gra += _len; +- data += _len; ++ rc = read_guest_abs(vcpu, gpa, data, fragment_len); ++ len -= fragment_len; ++ gra += fragment_len; ++ data += fragment_len; + } + return rc; + } diff --git a/patches.suse/KVM-s390-handle_tprot-Honor-storage-keys b/patches.suse/KVM-s390-handle_tprot-Honor-storage-keys new file mode 100644 index 0000000..402c75d --- /dev/null +++ b/patches.suse/KVM-s390-handle_tprot-Honor-storage-keys @@ -0,0 +1,151 @@ +From: Janis Schoetterl-Glausch +Date: Fri, 11 Feb 2022 19:22:08 +0100 +Subject: KVM: s390: handle_tprot: Honor storage keys +Git-commit: 61380a7adfce1524b8cd16c0ce4f46abce587f95 +Patch-mainline: v5.18-rc1 +References: jsc#PED-579 + +Use the access key operand to check for key protection when +translating guest addresses. +Since the translation code checks for accessing exceptions/error hvas, +we can remove the check here and simplify the control flow. +Keep checking if the memory is read-only even if such memslots are +currently not supported. + +handle_tprot was the last user of guest_translate_address, +so remove it. + +Signed-off-by: Janis Schoetterl-Glausch +Reviewed-by: Janosch Frank +Reviewed-by: Claudio Imbrenda +Link: https://lore.kernel.org/r/20220211182215.2730017-4-scgl@linux.ibm.com +Signed-off-by: Christian Borntraeger +Acked-by: Petr Tesarik +--- + arch/s390/kvm/gaccess.c | 9 ------ + arch/s390/kvm/gaccess.h | 3 -- + arch/s390/kvm/priv.c | 66 +++++++++++++++++++++++++----------------------- + 3 files changed, 35 insertions(+), 43 deletions(-) + +--- a/arch/s390/kvm/gaccess.c ++++ b/arch/s390/kvm/gaccess.c +@@ -1118,15 +1118,6 @@ int guest_translate_address_with_key(str + access_key); + } + +-int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, +- unsigned long *gpa, enum gacc_mode mode) +-{ +- u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key; +- +- return guest_translate_address_with_key(vcpu, gva, ar, gpa, mode, +- access_key); +-} +- + /** + * check_gva_range - test a range of guest virtual addresses for accessibility + * @vcpu: virtual cpu +--- a/arch/s390/kvm/gaccess.h ++++ b/arch/s390/kvm/gaccess.h +@@ -190,9 +190,6 @@ int guest_translate_address_with_key(str + unsigned long *gpa, enum gacc_mode mode, + u8 access_key); + +-int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, +- u8 ar, unsigned long *gpa, enum gacc_mode mode); +- + int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, + unsigned long length, enum gacc_mode mode, u8 access_key); + +--- a/arch/s390/kvm/priv.c ++++ b/arch/s390/kvm/priv.c +@@ -1440,10 +1440,11 @@ int kvm_s390_handle_eb(struct kvm_vcpu * + + static int handle_tprot(struct kvm_vcpu *vcpu) + { +- u64 address1, address2; +- unsigned long hva, gpa; +- int ret = 0, cc = 0; ++ u64 address, operand2; ++ unsigned long gpa; ++ u8 access_key; + bool writable; ++ int ret, cc; + u8 ar; + + vcpu->stat.instruction_tprot++; +@@ -1451,43 +1452,46 @@ static int handle_tprot(struct kvm_vcpu + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + +- kvm_s390_get_base_disp_sse(vcpu, &address1, &address2, &ar, NULL); ++ kvm_s390_get_base_disp_sse(vcpu, &address, &operand2, &ar, NULL); ++ access_key = (operand2 & 0xf0) >> 4; + +- /* we only handle the Linux memory detection case: +- * access key == 0 +- * everything else goes to userspace. */ +- if (address2 & 0xf0) +- return -EOPNOTSUPP; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) + ipte_lock(vcpu); +- ret = guest_translate_address(vcpu, address1, ar, &gpa, GACC_STORE); +- if (ret == PGM_PROTECTION) { ++ ++ ret = guest_translate_address_with_key(vcpu, address, ar, &gpa, ++ GACC_STORE, access_key); ++ if (ret == 0) { ++ gfn_to_hva_prot(vcpu->kvm, gpa_to_gfn(gpa), &writable); ++ } else if (ret == PGM_PROTECTION) { ++ writable = false; + /* Write protected? Try again with read-only... */ +- cc = 1; +- ret = guest_translate_address(vcpu, address1, ar, &gpa, +- GACC_FETCH); ++ ret = guest_translate_address_with_key(vcpu, address, ar, &gpa, ++ GACC_FETCH, access_key); + } +- if (ret) { +- if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) { +- ret = kvm_s390_inject_program_int(vcpu, ret); +- } else if (ret > 0) { +- /* Translation not available */ +- kvm_s390_set_psw_cc(vcpu, 3); ++ if (ret >= 0) { ++ cc = -1; ++ ++ /* Fetching permitted; storing permitted */ ++ if (ret == 0 && writable) ++ cc = 0; ++ /* Fetching permitted; storing not permitted */ ++ else if (ret == 0 && !writable) ++ cc = 1; ++ /* Fetching not permitted; storing not permitted */ ++ else if (ret == PGM_PROTECTION) ++ cc = 2; ++ /* Translation not available */ ++ else if (ret != PGM_ADDRESSING && ret != PGM_TRANSLATION_SPEC) ++ cc = 3; ++ ++ if (cc != -1) { ++ kvm_s390_set_psw_cc(vcpu, cc); + ret = 0; ++ } else { ++ ret = kvm_s390_inject_program_int(vcpu, ret); + } +- goto out_unlock; + } + +- hva = gfn_to_hva_prot(vcpu->kvm, gpa_to_gfn(gpa), &writable); +- if (kvm_is_error_hva(hva)) { +- ret = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); +- } else { +- if (!writable) +- cc = 1; /* Write not permitted ==> read-only */ +- kvm_s390_set_psw_cc(vcpu, cc); +- /* Note: CC2 only occurs for storage keys (not supported yet) */ +- } +-out_unlock: + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) + ipte_unlock(vcpu); + return ret; diff --git a/patches.suse/KVM-s390-selftest-Test-suppression-indication-on-key-prot-exception b/patches.suse/KVM-s390-selftest-Test-suppression-indication-on-key-prot-exception new file mode 100644 index 0000000..d428b6f --- /dev/null +++ b/patches.suse/KVM-s390-selftest-Test-suppression-indication-on-key-prot-exception @@ -0,0 +1,107 @@ +From: Janis Schoetterl-Glausch +Date: Thu, 12 May 2022 15:10:18 +0200 +Subject: KVM: s390: selftest: Test suppression indication on key prot + exception +Git-commit: c71159648c3cf0f7127ddc0bdf3eb4d7885210df +Patch-mainline: v5.19-rc1 +References: jsc#PED-579 + +Check that suppression is not indicated on injection of a key checked +protection exception caused by a memop after it already modified guest +memory, as that violates the definition of suppression. + +Signed-off-by: Janis Schoetterl-Glausch +Reviewed-by: Christian Borntraeger +Link: https://lore.kernel.org/r/20220512131019.2594948-3-scgl@linux.ibm.com +Signed-off-by: Christian Borntraeger +Signed-off-by: Janosch Frank +Acked-by: Petr Tesarik +--- + tools/testing/selftests/kvm/s390x/memop.c | 46 +++++++++++++++++++++++++++++- + 1 file changed, 45 insertions(+), 1 deletion(-) + +--- a/tools/testing/selftests/kvm/s390x/memop.c ++++ b/tools/testing/selftests/kvm/s390x/memop.c +@@ -10,6 +10,8 @@ + #include + #include + ++#include ++ + #include "test_util.h" + #include "kvm_util.h" + +@@ -194,6 +196,7 @@ static int err_memop_ioctl(struct test_v + #define SIDA_OFFSET(o) ._sida_offset = 1, .sida_offset = (o) + #define AR(a) ._ar = 1, .ar = (a) + #define KEY(a) .f_key = 1, .key = (a) ++#define INJECT .f_inject = 1 + + #define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); }) + +@@ -430,9 +433,18 @@ static void test_copy_key_fetch_prot(voi + TEST_ASSERT(rv == 4, "Should result in protection exception"); \ + }) + ++static void guest_error_key(void) ++{ ++ GUEST_SYNC(STAGE_INITED); ++ set_storage_key_range(mem1, PAGE_SIZE, 0x18); ++ set_storage_key_range(mem1 + PAGE_SIZE, sizeof(mem1) - PAGE_SIZE, 0x98); ++ GUEST_SYNC(STAGE_SKEYS_SET); ++ GUEST_SYNC(STAGE_IDLED); ++} ++ + static void test_errors_key(void) + { +- struct test_default t = test_default_init(guest_copy_key_fetch_prot); ++ struct test_default t = test_default_init(guest_error_key); + + HOST_SYNC(t.vcpu, STAGE_INITED); + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); +@@ -446,6 +458,37 @@ static void test_errors_key(void) + kvm_vm_free(t.kvm_vm); + } + ++static void test_termination(void) ++{ ++ struct test_default t = test_default_init(guest_error_key); ++ uint64_t prefix; ++ uint64_t teid; ++ uint64_t teid_mask = BIT(63 - 56) | BIT(63 - 60) | BIT(63 - 61); ++ uint64_t psw[2]; ++ ++ HOST_SYNC(t.vcpu, STAGE_INITED); ++ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); ++ ++ /* vcpu, mismatching keys after first page */ ++ ERR_PROT_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(1), INJECT); ++ /* ++ * The memop injected a program exception and the test needs to check the ++ * Translation-Exception Identification (TEID). It is necessary to run ++ * the guest in order to be able to read the TEID from guest memory. ++ * Set the guest program new PSW, so the guest state is not clobbered. ++ */ ++ prefix = t.run->s.regs.prefix; ++ psw[0] = t.run->psw_mask; ++ psw[1] = t.run->psw_addr; ++ MOP(t.vm, ABSOLUTE, WRITE, psw, sizeof(psw), GADDR(prefix + 464)); ++ HOST_SYNC(t.vcpu, STAGE_IDLED); ++ MOP(t.vm, ABSOLUTE, READ, &teid, sizeof(teid), GADDR(prefix + 168)); ++ /* Bits 56, 60, 61 form a code, 0 being the only one allowing for termination */ ++ ASSERT_EQ(teid & teid_mask, 0); ++ ++ kvm_vm_free(t.kvm_vm); ++} ++ + static void test_errors_key_storage_prot_override(void) + { + struct test_default t = test_default_init(guest_copy_key_fetch_prot); +@@ -668,6 +711,7 @@ int main(int argc, char *argv[]) + test_copy_key_fetch_prot(); + test_copy_key_fetch_prot_override(); + test_errors_key(); ++ test_termination(); + test_errors_key_storage_prot_override(); + test_errors_key_fetch_prot_override_not_enabled(); + test_errors_key_fetch_prot_override_enabled(); diff --git a/patches.suse/KVM-s390-selftests-Add-error-memop-tests b/patches.suse/KVM-s390-selftests-Add-error-memop-tests new file mode 100644 index 0000000..ff684b7 --- /dev/null +++ b/patches.suse/KVM-s390-selftests-Add-error-memop-tests @@ -0,0 +1,207 @@ +From: Janis Schoetterl-Glausch +Date: Tue, 8 Mar 2022 13:58:41 +0100 +Subject: KVM: s390: selftests: Add error memop tests +Git-commit: 3bcc372c9865bec3ab9bfcf30b2426cf68bc18af +Patch-mainline: v5.18-rc1 +References: jsc#PED-579 + +Test that errors occur if key protection disallows access, including +tests for storage and fetch protection override. Perform tests for both +logical vcpu and absolute vm ioctls. +Also extend the existing tests to the vm ioctl. + +Signed-off-by: Janis Schoetterl-Glausch +Link: https://lore.kernel.org/r/20220308125841.3271721-6-scgl@linux.ibm.com +Signed-off-by: Christian Borntraeger +Acked-by: Petr Tesarik +--- + tools/testing/selftests/kvm/s390x/memop.c | 137 +++++++++++++++++++++++++++--- + 1 file changed, 124 insertions(+), 13 deletions(-) + +--- a/tools/testing/selftests/kvm/s390x/memop.c ++++ b/tools/testing/selftests/kvm/s390x/memop.c +@@ -422,6 +422,46 @@ static void test_copy_key_fetch_prot(voi + kvm_vm_free(t.kvm_vm); + } + ++#define ERR_PROT_MOP(...) \ ++({ \ ++ int rv; \ ++ \ ++ rv = ERR_MOP(__VA_ARGS__); \ ++ TEST_ASSERT(rv == 4, "Should result in protection exception"); \ ++}) ++ ++static void test_errors_key(void) ++{ ++ struct test_default t = test_default_init(guest_copy_key_fetch_prot); ++ ++ HOST_SYNC(t.vcpu, STAGE_INITED); ++ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); ++ ++ /* vm/vcpu, mismatching keys, fetch protection in effect */ ++ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2)); ++ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, t.size, GADDR_V(mem2), KEY(2)); ++ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2)); ++ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem2), KEY(2)); ++ ++ kvm_vm_free(t.kvm_vm); ++} ++ ++static void test_errors_key_storage_prot_override(void) ++{ ++ struct test_default t = test_default_init(guest_copy_key_fetch_prot); ++ ++ HOST_SYNC(t.vcpu, STAGE_INITED); ++ t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE; ++ t.run->kvm_dirty_regs = KVM_SYNC_CRS; ++ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); ++ ++ /* vm, mismatching keys, storage protection override not applicable to vm */ ++ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2)); ++ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem2), KEY(2)); ++ ++ kvm_vm_free(t.kvm_vm); ++} ++ + const uint64_t last_page_addr = -PAGE_SIZE; + + static void guest_copy_key_fetch_prot_override(void) +@@ -481,6 +521,58 @@ out: + kvm_vm_free(t.kvm_vm); + } + ++static void test_errors_key_fetch_prot_override_not_enabled(void) ++{ ++ struct test_default t = test_default_init(guest_copy_key_fetch_prot_override); ++ vm_vaddr_t guest_0_page, guest_last_page; ++ ++ guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0); ++ guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); ++ if (guest_0_page != 0 || guest_last_page != last_page_addr) { ++ print_skip("did not allocate guest pages at required positions"); ++ goto out; ++ } ++ HOST_SYNC(t.vcpu, STAGE_INITED); ++ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); ++ ++ /* vcpu, mismatching keys on fetch, fetch protection override not enabled */ ++ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(0), KEY(2)); ++ ++out: ++ kvm_vm_free(t.kvm_vm); ++} ++ ++static void test_errors_key_fetch_prot_override_enabled(void) ++{ ++ struct test_default t = test_default_init(guest_copy_key_fetch_prot_override); ++ vm_vaddr_t guest_0_page, guest_last_page; ++ ++ guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0); ++ guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); ++ if (guest_0_page != 0 || guest_last_page != last_page_addr) { ++ print_skip("did not allocate guest pages at required positions"); ++ goto out; ++ } ++ HOST_SYNC(t.vcpu, STAGE_INITED); ++ t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE; ++ t.run->kvm_dirty_regs = KVM_SYNC_CRS; ++ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); ++ ++ /* ++ * vcpu, mismatching keys on fetch, ++ * fetch protection override does not apply because memory range acceeded ++ */ ++ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048 + 1, GADDR_V(0), KEY(2)); ++ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048 + 1, ++ GADDR_V(guest_last_page), KEY(2)); ++ /* vm, fetch protected override does not apply */ ++ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR(0), KEY(2)); ++ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2)); ++ ++out: ++ kvm_vm_free(t.kvm_vm); ++} ++ + static void guest_idle(void) + { + GUEST_SYNC(STAGE_INITED); /* for consistency's sake */ +@@ -488,39 +580,54 @@ static void guest_idle(void) + GUEST_SYNC(STAGE_IDLED); + } + +-static void test_errors(void) ++static void _test_errors_common(struct test_vcpu vcpu, enum mop_target target, int size) + { +- struct test_default t = test_default_init(guest_idle); + int rv; + +- HOST_SYNC(t.vcpu, STAGE_INITED); +- + /* Bad size: */ +- rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, -1, GADDR_V(mem1)); ++ rv = ERR_MOP(vcpu, target, WRITE, mem1, -1, GADDR_V(mem1)); + TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes"); + + /* Zero size: */ +- rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, 0, GADDR_V(mem1)); ++ rv = ERR_MOP(vcpu, target, WRITE, mem1, 0, GADDR_V(mem1)); + TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM), + "ioctl allows 0 as size"); + + /* Bad flags: */ +- rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), SET_FLAGS(-1)); ++ rv = ERR_MOP(vcpu, target, WRITE, mem1, size, GADDR_V(mem1), SET_FLAGS(-1)); + TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags"); + +- /* Bad operation: */ +- rv = ERR_MOP(t.vcpu, INVALID, WRITE, mem1, t.size, GADDR_V(mem1)); +- TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations"); +- + /* Bad guest address: */ +- rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR((void *)~0xfffUL), CHECK_ONLY); ++ rv = ERR_MOP(vcpu, target, WRITE, mem1, size, GADDR((void *)~0xfffUL), CHECK_ONLY); + TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory access"); + + /* Bad host address: */ +- rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, 0, t.size, GADDR_V(mem1)); ++ rv = ERR_MOP(vcpu, target, WRITE, 0, size, GADDR_V(mem1)); + TEST_ASSERT(rv == -1 && errno == EFAULT, + "ioctl does not report bad host memory address"); + ++ /* Bad key: */ ++ rv = ERR_MOP(vcpu, target, WRITE, mem1, size, GADDR_V(mem1), KEY(17)); ++ TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows invalid key"); ++} ++ ++static void test_errors(void) ++{ ++ struct test_default t = test_default_init(guest_idle); ++ int rv; ++ ++ HOST_SYNC(t.vcpu, STAGE_INITED); ++ ++ _test_errors_common(t.vcpu, LOGICAL, t.size); ++ _test_errors_common(t.vm, ABSOLUTE, t.size); ++ ++ /* Bad operation: */ ++ rv = ERR_MOP(t.vcpu, INVALID, WRITE, mem1, t.size, GADDR_V(mem1)); ++ TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations"); ++ /* virtual addresses are not translated when passing INVALID */ ++ rv = ERR_MOP(t.vm, INVALID, WRITE, mem1, PAGE_SIZE, GADDR(0)); ++ TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations"); ++ + /* Bad access register: */ + t.run->psw_mask &= ~(3UL << (63 - 17)); + t.run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */ +@@ -560,6 +667,10 @@ int main(int argc, char *argv[]) + test_copy_key_storage_prot_override(); + test_copy_key_fetch_prot(); + test_copy_key_fetch_prot_override(); ++ test_errors_key(); ++ test_errors_key_storage_prot_override(); ++ test_errors_key_fetch_prot_override_not_enabled(); ++ test_errors_key_fetch_prot_override_enabled(); + } else { + print_skip("storage key memop extension not supported"); + } diff --git a/patches.suse/KVM-s390-selftests-Add-macro-as-abstraction-for-MEM_OP b/patches.suse/KVM-s390-selftests-Add-macro-as-abstraction-for-MEM_OP new file mode 100644 index 0000000..c24a6bc --- /dev/null +++ b/patches.suse/KVM-s390-selftests-Add-macro-as-abstraction-for-MEM_OP @@ -0,0 +1,378 @@ +From: Janis Schoetterl-Glausch +Date: Tue, 8 Mar 2022 13:58:38 +0100 +Subject: KVM: s390: selftests: Add macro as abstraction for MEM_OP +Git-commit: 4eb562ab99c427bcfb94d39bf54a44919ccbb64c +Patch-mainline: v5.18-rc1 +References: jsc#PED-579 + +In order to achieve good test coverage we need to be able to invoke the +MEM_OP ioctl with all possible parametrizations. +However, for a given test, we want to be concise and not specify a long +list of default values for parameters not relevant for the test, so the +readers attention is not needlessly diverted. +Add a macro that enables this and convert the existing test to use it. +The macro emulates named arguments and hides some of the ioctl's +redundancy, e.g. sets the key flag if an access key is specified. + +Signed-off-by: Janis Schoetterl-Glausch +Link: https://lore.kernel.org/r/20220308125841.3271721-3-scgl@linux.ibm.com +Signed-off-by: Christian Borntraeger +Acked-by: Petr Tesarik +--- + tools/testing/selftests/kvm/s390x/memop.c | 272 +++++++++++++++++++++--------- + 1 file changed, 197 insertions(+), 75 deletions(-) + +--- a/tools/testing/selftests/kvm/s390x/memop.c ++++ b/tools/testing/selftests/kvm/s390x/memop.c +@@ -13,6 +13,188 @@ + #include "test_util.h" + #include "kvm_util.h" + ++enum mop_target { ++ LOGICAL, ++ SIDA, ++ ABSOLUTE, ++ INVALID, ++}; ++ ++enum mop_access_mode { ++ READ, ++ WRITE, ++}; ++ ++struct mop_desc { ++ uintptr_t gaddr; ++ uintptr_t gaddr_v; ++ uint64_t set_flags; ++ unsigned int f_check : 1; ++ unsigned int f_inject : 1; ++ unsigned int f_key : 1; ++ unsigned int _gaddr_v : 1; ++ unsigned int _set_flags : 1; ++ unsigned int _sida_offset : 1; ++ unsigned int _ar : 1; ++ uint32_t size; ++ enum mop_target target; ++ enum mop_access_mode mode; ++ void *buf; ++ uint32_t sida_offset; ++ uint8_t ar; ++ uint8_t key; ++}; ++ ++static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc desc) ++{ ++ struct kvm_s390_mem_op ksmo = { ++ .gaddr = (uintptr_t)desc.gaddr, ++ .size = desc.size, ++ .buf = ((uintptr_t)desc.buf), ++ .reserved = "ignored_ignored_ignored_ignored" ++ }; ++ ++ switch (desc.target) { ++ case LOGICAL: ++ if (desc.mode == READ) ++ ksmo.op = KVM_S390_MEMOP_LOGICAL_READ; ++ if (desc.mode == WRITE) ++ ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; ++ break; ++ case SIDA: ++ if (desc.mode == READ) ++ ksmo.op = KVM_S390_MEMOP_SIDA_READ; ++ if (desc.mode == WRITE) ++ ksmo.op = KVM_S390_MEMOP_SIDA_WRITE; ++ break; ++ case ABSOLUTE: ++ if (desc.mode == READ) ++ ksmo.op = KVM_S390_MEMOP_ABSOLUTE_READ; ++ if (desc.mode == WRITE) ++ ksmo.op = KVM_S390_MEMOP_ABSOLUTE_WRITE; ++ break; ++ case INVALID: ++ ksmo.op = -1; ++ } ++ if (desc.f_check) ++ ksmo.flags |= KVM_S390_MEMOP_F_CHECK_ONLY; ++ if (desc.f_inject) ++ ksmo.flags |= KVM_S390_MEMOP_F_INJECT_EXCEPTION; ++ if (desc._set_flags) ++ ksmo.flags = desc.set_flags; ++ if (desc.f_key) { ++ ksmo.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION; ++ ksmo.key = desc.key; ++ } ++ if (desc._ar) ++ ksmo.ar = desc.ar; ++ else ++ ksmo.ar = 0; ++ if (desc._sida_offset) ++ ksmo.sida_offset = desc.sida_offset; ++ ++ return ksmo; ++} ++ ++/* vcpu dummy id signifying that vm instead of vcpu ioctl is to occur */ ++const uint32_t VM_VCPU_ID = (uint32_t)-1; ++ ++struct test_vcpu { ++ struct kvm_vm *vm; ++ uint32_t id; ++}; ++ ++#define PRINT_MEMOP false ++static void print_memop(uint32_t vcpu_id, const struct kvm_s390_mem_op *ksmo) ++{ ++ if (!PRINT_MEMOP) ++ return; ++ ++ if (vcpu_id == VM_VCPU_ID) ++ printf("vm memop("); ++ else ++ printf("vcpu memop("); ++ switch (ksmo->op) { ++ case KVM_S390_MEMOP_LOGICAL_READ: ++ printf("LOGICAL, READ, "); ++ break; ++ case KVM_S390_MEMOP_LOGICAL_WRITE: ++ printf("LOGICAL, WRITE, "); ++ break; ++ case KVM_S390_MEMOP_SIDA_READ: ++ printf("SIDA, READ, "); ++ break; ++ case KVM_S390_MEMOP_SIDA_WRITE: ++ printf("SIDA, WRITE, "); ++ break; ++ case KVM_S390_MEMOP_ABSOLUTE_READ: ++ printf("ABSOLUTE, READ, "); ++ break; ++ case KVM_S390_MEMOP_ABSOLUTE_WRITE: ++ printf("ABSOLUTE, WRITE, "); ++ break; ++ } ++ printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u", ++ ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key); ++ if (ksmo->flags & KVM_S390_MEMOP_F_CHECK_ONLY) ++ printf(", CHECK_ONLY"); ++ if (ksmo->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) ++ printf(", INJECT_EXCEPTION"); ++ if (ksmo->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) ++ printf(", SKEY_PROTECTION"); ++ puts(")"); ++} ++ ++static void memop_ioctl(struct test_vcpu vcpu, struct kvm_s390_mem_op *ksmo) ++{ ++ if (vcpu.id == VM_VCPU_ID) ++ vm_ioctl(vcpu.vm, KVM_S390_MEM_OP, ksmo); ++ else ++ vcpu_ioctl(vcpu.vm, vcpu.id, KVM_S390_MEM_OP, ksmo); ++} ++ ++static int err_memop_ioctl(struct test_vcpu vcpu, struct kvm_s390_mem_op *ksmo) ++{ ++ if (vcpu.id == VM_VCPU_ID) ++ return _vm_ioctl(vcpu.vm, KVM_S390_MEM_OP, ksmo); ++ else ++ return _vcpu_ioctl(vcpu.vm, vcpu.id, KVM_S390_MEM_OP, ksmo); ++} ++ ++#define MEMOP(err, vcpu_p, mop_target_p, access_mode_p, buf_p, size_p, ...) \ ++({ \ ++ struct test_vcpu __vcpu = (vcpu_p); \ ++ struct mop_desc __desc = { \ ++ .target = (mop_target_p), \ ++ .mode = (access_mode_p), \ ++ .buf = (buf_p), \ ++ .size = (size_p), \ ++ __VA_ARGS__ \ ++ }; \ ++ struct kvm_s390_mem_op __ksmo; \ ++ \ ++ if (__desc._gaddr_v) { \ ++ if (__desc.target == ABSOLUTE) \ ++ __desc.gaddr = addr_gva2gpa(__vcpu.vm, __desc.gaddr_v); \ ++ else \ ++ __desc.gaddr = __desc.gaddr_v; \ ++ } \ ++ __ksmo = ksmo_from_desc(__desc); \ ++ print_memop(__vcpu.id, &__ksmo); \ ++ err##memop_ioctl(__vcpu, &__ksmo); \ ++}) ++ ++#define MOP(...) MEMOP(, __VA_ARGS__) ++#define ERR_MOP(...) MEMOP(err_, __VA_ARGS__) ++ ++#define GADDR(a) .gaddr = ((uintptr_t)a) ++#define GADDR_V(v) ._gaddr_v = 1, .gaddr_v = ((uintptr_t)v) ++#define CHECK_ONLY .f_check = 1 ++#define SET_FLAGS(f) ._set_flags = 1, .set_flags = (f) ++#define SIDA_OFFSET(o) ._sida_offset = 1, .sida_offset = (o) ++#define AR(a) ._ar = 1, .ar = (a) ++#define KEY(a) .f_key = 1, .key = (a) ++ + #define VCPU_ID 1 + + static uint8_t mem1[65536]; +@@ -20,6 +202,7 @@ static uint8_t mem2[65536]; + + struct test_default { + struct kvm_vm *kvm_vm; ++ struct test_vcpu vcpu; + struct kvm_run *run; + int size; + }; +@@ -30,6 +213,7 @@ static struct test_default test_default_ + + t.size = min((size_t)kvm_check_cap(KVM_CAP_S390_MEM_OP), sizeof(mem1)); + t.kvm_vm = vm_create_default(VCPU_ID, 0, guest_code); ++ t.vcpu = (struct test_vcpu) { t.kvm_vm, VCPU_ID }; + t.run = vcpu_state(t.kvm_vm, VCPU_ID); + return t; + } +@@ -43,20 +227,14 @@ static void guest_copy(void) + static void test_copy(void) + { + struct test_default t = test_default_init(guest_copy); +- struct kvm_s390_mem_op ksmo; + int i; + + for (i = 0; i < sizeof(mem1); i++) + mem1[i] = i * i + i; + + /* Set the first array */ +- ksmo.gaddr = addr_gva2gpa(t.kvm_vm, (uintptr_t)mem1); +- ksmo.flags = 0; +- ksmo.size = t.size; +- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; +- ksmo.buf = (uintptr_t)mem1; +- ksmo.ar = 0; +- vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, ++ GADDR(addr_gva2gpa(t.kvm_vm, (uintptr_t)mem1))); + + /* Let the guest code copy the first array to the second */ + vcpu_run(t.kvm_vm, VCPU_ID); +@@ -68,13 +246,7 @@ static void test_copy(void) + memset(mem2, 0xaa, sizeof(mem2)); + + /* Get the second array */ +- ksmo.gaddr = (uintptr_t)mem2; +- ksmo.flags = 0; +- ksmo.size = t.size; +- ksmo.op = KVM_S390_MEMOP_LOGICAL_READ; +- ksmo.buf = (uintptr_t)mem2; +- ksmo.ar = 0; +- vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ MOP(t.vcpu, LOGICAL, READ, mem2, t.size, GADDR_V(mem2)); + + TEST_ASSERT(!memcmp(mem1, mem2, t.size), + "Memory contents do not match!"); +@@ -91,68 +263,31 @@ static void guest_idle(void) + static void test_errors(void) + { + struct test_default t = test_default_init(guest_idle); +- struct kvm_s390_mem_op ksmo; + int rv; + +- /* Check error conditions - first bad size: */ +- ksmo.gaddr = (uintptr_t)mem1; +- ksmo.flags = 0; +- ksmo.size = -1; +- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; +- ksmo.buf = (uintptr_t)mem1; +- ksmo.ar = 0; +- rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ /* Bad size: */ ++ rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, -1, GADDR_V(mem1)); + TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes"); + + /* Zero size: */ +- ksmo.gaddr = (uintptr_t)mem1; +- ksmo.flags = 0; +- ksmo.size = 0; +- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; +- ksmo.buf = (uintptr_t)mem1; +- ksmo.ar = 0; +- rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, 0, GADDR_V(mem1)); + TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM), + "ioctl allows 0 as size"); + + /* Bad flags: */ +- ksmo.gaddr = (uintptr_t)mem1; +- ksmo.flags = -1; +- ksmo.size = t.size; +- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; +- ksmo.buf = (uintptr_t)mem1; +- ksmo.ar = 0; +- rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), SET_FLAGS(-1)); + TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags"); + + /* Bad operation: */ +- ksmo.gaddr = (uintptr_t)mem1; +- ksmo.flags = 0; +- ksmo.size = t.size; +- ksmo.op = -1; +- ksmo.buf = (uintptr_t)mem1; +- ksmo.ar = 0; +- rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ rv = ERR_MOP(t.vcpu, INVALID, WRITE, mem1, t.size, GADDR_V(mem1)); + TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations"); + + /* Bad guest address: */ +- ksmo.gaddr = ~0xfffUL; +- ksmo.flags = KVM_S390_MEMOP_F_CHECK_ONLY; +- ksmo.size = t.size; +- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; +- ksmo.buf = (uintptr_t)mem1; +- ksmo.ar = 0; +- rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR((void *)~0xfffUL), CHECK_ONLY); + TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory access"); + + /* Bad host address: */ +- ksmo.gaddr = (uintptr_t)mem1; +- ksmo.flags = 0; +- ksmo.size = t.size; +- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; +- ksmo.buf = 0; +- ksmo.ar = 0; +- rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, 0, t.size, GADDR_V(mem1)); + TEST_ASSERT(rv == -1 && errno == EFAULT, + "ioctl does not report bad host memory address"); + +@@ -160,29 +295,16 @@ static void test_errors(void) + t.run->psw_mask &= ~(3UL << (63 - 17)); + t.run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */ + vcpu_run(t.kvm_vm, VCPU_ID); /* To sync new state to SIE block */ +- ksmo.gaddr = (uintptr_t)mem1; +- ksmo.flags = 0; +- ksmo.size = t.size; +- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; +- ksmo.buf = (uintptr_t)mem1; +- ksmo.ar = 17; +- rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), AR(17)); + TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15"); + t.run->psw_mask &= ~(3UL << (63 - 17)); /* Disable AR mode */ + vcpu_run(t.kvm_vm, VCPU_ID); /* Run to sync new state */ + + /* Check that the SIDA calls are rejected for non-protected guests */ +- ksmo.gaddr = 0; +- ksmo.flags = 0; +- ksmo.size = 8; +- ksmo.op = KVM_S390_MEMOP_SIDA_READ; +- ksmo.buf = (uintptr_t)mem1; +- ksmo.sida_offset = 0x1c0; +- rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ rv = ERR_MOP(t.vcpu, SIDA, READ, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0)); + TEST_ASSERT(rv == -1 && errno == EINVAL, + "ioctl does not reject SIDA_READ in non-protected mode"); +- ksmo.op = KVM_S390_MEMOP_SIDA_WRITE; +- rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ rv = ERR_MOP(t.vcpu, SIDA, WRITE, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0)); + TEST_ASSERT(rv == -1 && errno == EINVAL, + "ioctl does not reject SIDA_WRITE in non-protected mode"); + diff --git a/patches.suse/KVM-s390-selftests-Add-more-copy-memop-tests b/patches.suse/KVM-s390-selftests-Add-more-copy-memop-tests new file mode 100644 index 0000000..f53ce9e --- /dev/null +++ b/patches.suse/KVM-s390-selftests-Add-more-copy-memop-tests @@ -0,0 +1,334 @@ +From: Janis Schoetterl-Glausch +Date: Tue, 8 Mar 2022 13:58:40 +0100 +Subject: KVM: s390: selftests: Add more copy memop tests +Git-commit: 1bb873495a9ebbc8f6dd54a110b2c639e225c782 +Patch-mainline: v5.18-rc1 +References: jsc#PED-579 + +Do not just test the actual copy, but also that success is indicated +when using the check only flag. +Add copy test with storage key checking enabled, including tests for +storage and fetch protection override. +These test cover both logical vcpu ioctls as well as absolute vm ioctls. + +Signed-off-by: Janis Schoetterl-Glausch +Link: https://lore.kernel.org/r/20220308125841.3271721-5-scgl@linux.ibm.com +Signed-off-by: Christian Borntraeger +Acked-by: Petr Tesarik +--- + tools/testing/selftests/kvm/s390x/memop.c | 243 ++++++++++++++++++++++++++++-- + 1 file changed, 230 insertions(+), 13 deletions(-) + +--- a/tools/testing/selftests/kvm/s390x/memop.c ++++ b/tools/testing/selftests/kvm/s390x/memop.c +@@ -195,13 +195,21 @@ static int err_memop_ioctl(struct test_v + #define AR(a) ._ar = 1, .ar = (a) + #define KEY(a) .f_key = 1, .key = (a) + ++#define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); }) ++ + #define VCPU_ID 1 ++#define PAGE_SHIFT 12 ++#define PAGE_SIZE (1ULL << PAGE_SHIFT) ++#define PAGE_MASK (~(PAGE_SIZE - 1)) ++#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) ++#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) + + static uint8_t mem1[65536]; + static uint8_t mem2[65536]; + + struct test_default { + struct kvm_vm *kvm_vm; ++ struct test_vcpu vm; + struct test_vcpu vcpu; + struct kvm_run *run; + int size; +@@ -213,6 +221,7 @@ static struct test_default test_default_ + + t.size = min((size_t)kvm_check_cap(KVM_CAP_S390_MEM_OP), sizeof(mem1)); + t.kvm_vm = vm_create_default(VCPU_ID, 0, guest_code); ++ t.vm = (struct test_vcpu) { t.kvm_vm, VM_VCPU_ID }; + t.vcpu = (struct test_vcpu) { t.kvm_vm, VCPU_ID }; + t.run = vcpu_state(t.kvm_vm, VCPU_ID); + return t; +@@ -223,6 +232,8 @@ enum stage { + STAGE_INITED, + /* Guest did nothing */ + STAGE_IDLED, ++ /* Guest set storage keys (specifics up to test case) */ ++ STAGE_SKEYS_SET, + /* Guest copied memory (locations up to test case) */ + STAGE_COPIED, + }; +@@ -239,6 +250,47 @@ enum stage { + ASSERT_EQ(uc.args[1], __stage); \ + }) \ + ++static void prepare_mem12(void) ++{ ++ int i; ++ ++ for (i = 0; i < sizeof(mem1); i++) ++ mem1[i] = rand(); ++ memset(mem2, 0xaa, sizeof(mem2)); ++} ++ ++#define ASSERT_MEM_EQ(p1, p2, size) \ ++ TEST_ASSERT(!memcmp(p1, p2, size), "Memory contents do not match!") ++ ++#define DEFAULT_WRITE_READ(copy_cpu, mop_cpu, mop_target_p, size, ...) \ ++({ \ ++ struct test_vcpu __copy_cpu = (copy_cpu), __mop_cpu = (mop_cpu); \ ++ enum mop_target __target = (mop_target_p); \ ++ uint32_t __size = (size); \ ++ \ ++ prepare_mem12(); \ ++ CHECK_N_DO(MOP, __mop_cpu, __target, WRITE, mem1, __size, \ ++ GADDR_V(mem1), ##__VA_ARGS__); \ ++ HOST_SYNC(__copy_cpu, STAGE_COPIED); \ ++ CHECK_N_DO(MOP, __mop_cpu, __target, READ, mem2, __size, \ ++ GADDR_V(mem2), ##__VA_ARGS__); \ ++ ASSERT_MEM_EQ(mem1, mem2, __size); \ ++}) ++ ++#define DEFAULT_READ(copy_cpu, mop_cpu, mop_target_p, size, ...) \ ++({ \ ++ struct test_vcpu __copy_cpu = (copy_cpu), __mop_cpu = (mop_cpu); \ ++ enum mop_target __target = (mop_target_p); \ ++ uint32_t __size = (size); \ ++ \ ++ prepare_mem12(); \ ++ CHECK_N_DO(MOP, __mop_cpu, __target, WRITE, mem1, __size, \ ++ GADDR_V(mem1)); \ ++ HOST_SYNC(__copy_cpu, STAGE_COPIED); \ ++ CHECK_N_DO(MOP, __mop_cpu, __target, READ, mem2, __size, ##__VA_ARGS__);\ ++ ASSERT_MEM_EQ(mem1, mem2, __size); \ ++}) ++ + static void guest_copy(void) + { + GUEST_SYNC(STAGE_INITED); +@@ -249,27 +301,183 @@ static void guest_copy(void) + static void test_copy(void) + { + struct test_default t = test_default_init(guest_copy); +- int i; + +- for (i = 0; i < sizeof(mem1); i++) +- mem1[i] = i * i + i; ++ HOST_SYNC(t.vcpu, STAGE_INITED); ++ ++ DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, t.size); ++ ++ kvm_vm_free(t.kvm_vm); ++} ++ ++static void set_storage_key_range(void *addr, size_t len, uint8_t key) ++{ ++ uintptr_t _addr, abs, i; ++ int not_mapped = 0; ++ ++ _addr = (uintptr_t)addr; ++ for (i = _addr & PAGE_MASK; i < _addr + len; i += PAGE_SIZE) { ++ abs = i; ++ asm volatile ( ++ "lra %[abs], 0(0,%[abs])\n" ++ " jz 0f\n" ++ " llill %[not_mapped],1\n" ++ " j 1f\n" ++ "0: sske %[key], %[abs]\n" ++ "1:" ++ : [abs] "+&a" (abs), [not_mapped] "+r" (not_mapped) ++ : [key] "r" (key) ++ : "cc" ++ ); ++ GUEST_ASSERT_EQ(not_mapped, 0); ++ } ++} ++ ++static void guest_copy_key(void) ++{ ++ set_storage_key_range(mem1, sizeof(mem1), 0x90); ++ set_storage_key_range(mem2, sizeof(mem2), 0x90); ++ GUEST_SYNC(STAGE_SKEYS_SET); ++ ++ for (;;) { ++ memcpy(&mem2, &mem1, sizeof(mem2)); ++ GUEST_SYNC(STAGE_COPIED); ++ } ++} ++ ++static void test_copy_key(void) ++{ ++ struct test_default t = test_default_init(guest_copy_key); ++ ++ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); ++ ++ /* vm, no key */ ++ DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, t.size); ++ ++ /* vm/vcpu, machting key or key 0 */ ++ DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, t.size, KEY(0)); ++ DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, t.size, KEY(9)); ++ DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, t.size, KEY(0)); ++ DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, t.size, KEY(9)); ++ /* ++ * There used to be different code paths for key handling depending on ++ * if the region crossed a page boundary. ++ * There currently are not, but the more tests the merrier. ++ */ ++ DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, 1, KEY(0)); ++ DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, 1, KEY(9)); ++ DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, 1, KEY(0)); ++ DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, 1, KEY(9)); ++ ++ /* vm/vcpu, mismatching keys on read, but no fetch protection */ ++ DEFAULT_READ(t.vcpu, t.vcpu, LOGICAL, t.size, GADDR_V(mem2), KEY(2)); ++ DEFAULT_READ(t.vcpu, t.vm, ABSOLUTE, t.size, GADDR_V(mem1), KEY(2)); ++ ++ kvm_vm_free(t.kvm_vm); ++} ++ ++static void guest_copy_key_fetch_prot(void) ++{ ++ /* ++ * For some reason combining the first sync with override enablement ++ * results in an exception when calling HOST_SYNC. ++ */ ++ GUEST_SYNC(STAGE_INITED); ++ /* Storage protection override applies to both store and fetch. */ ++ set_storage_key_range(mem1, sizeof(mem1), 0x98); ++ set_storage_key_range(mem2, sizeof(mem2), 0x98); ++ GUEST_SYNC(STAGE_SKEYS_SET); ++ ++ for (;;) { ++ memcpy(&mem2, &mem1, sizeof(mem2)); ++ GUEST_SYNC(STAGE_COPIED); ++ } ++} ++ ++static void test_copy_key_storage_prot_override(void) ++{ ++ struct test_default t = test_default_init(guest_copy_key_fetch_prot); + + HOST_SYNC(t.vcpu, STAGE_INITED); ++ t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE; ++ t.run->kvm_dirty_regs = KVM_SYNC_CRS; ++ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + +- /* Set the first array */ +- MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1)); ++ /* vcpu, mismatching keys, storage protection override in effect */ ++ DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, t.size, KEY(2)); + +- /* Let the guest code copy the first array to the second */ +- HOST_SYNC(t.vcpu, STAGE_COPIED); ++ kvm_vm_free(t.kvm_vm); ++} + +- memset(mem2, 0xaa, sizeof(mem2)); ++static void test_copy_key_fetch_prot(void) ++{ ++ struct test_default t = test_default_init(guest_copy_key_fetch_prot); ++ ++ HOST_SYNC(t.vcpu, STAGE_INITED); ++ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); ++ ++ /* vm/vcpu, matching key, fetch protection in effect */ ++ DEFAULT_READ(t.vcpu, t.vcpu, LOGICAL, t.size, GADDR_V(mem2), KEY(9)); ++ DEFAULT_READ(t.vcpu, t.vm, ABSOLUTE, t.size, GADDR_V(mem2), KEY(9)); ++ ++ kvm_vm_free(t.kvm_vm); ++} + +- /* Get the second array */ +- MOP(t.vcpu, LOGICAL, READ, mem2, t.size, GADDR_V(mem2)); ++const uint64_t last_page_addr = -PAGE_SIZE; + +- TEST_ASSERT(!memcmp(mem1, mem2, t.size), +- "Memory contents do not match!"); ++static void guest_copy_key_fetch_prot_override(void) ++{ ++ int i; ++ char *page_0 = 0; ++ ++ GUEST_SYNC(STAGE_INITED); ++ set_storage_key_range(0, PAGE_SIZE, 0x18); ++ set_storage_key_range((void *)last_page_addr, PAGE_SIZE, 0x0); ++ asm volatile ("sske %[key],%[addr]\n" :: [addr] "r"(0), [key] "r"(0x18) : "cc"); ++ GUEST_SYNC(STAGE_SKEYS_SET); ++ ++ for (;;) { ++ for (i = 0; i < PAGE_SIZE; i++) ++ page_0[i] = mem1[i]; ++ GUEST_SYNC(STAGE_COPIED); ++ } ++} ++ ++static void test_copy_key_fetch_prot_override(void) ++{ ++ struct test_default t = test_default_init(guest_copy_key_fetch_prot_override); ++ vm_vaddr_t guest_0_page, guest_last_page; + ++ guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0); ++ guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); ++ if (guest_0_page != 0 || guest_last_page != last_page_addr) { ++ print_skip("did not allocate guest pages at required positions"); ++ goto out; ++ } ++ ++ HOST_SYNC(t.vcpu, STAGE_INITED); ++ t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE; ++ t.run->kvm_dirty_regs = KVM_SYNC_CRS; ++ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); ++ ++ /* vcpu, mismatching keys on fetch, fetch protection override applies */ ++ prepare_mem12(); ++ MOP(t.vcpu, LOGICAL, WRITE, mem1, PAGE_SIZE, GADDR_V(mem1)); ++ HOST_SYNC(t.vcpu, STAGE_COPIED); ++ CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2)); ++ ASSERT_MEM_EQ(mem1, mem2, 2048); ++ ++ /* ++ * vcpu, mismatching keys on fetch, fetch protection override applies, ++ * wraparound ++ */ ++ prepare_mem12(); ++ MOP(t.vcpu, LOGICAL, WRITE, mem1, 2 * PAGE_SIZE, GADDR_V(guest_last_page)); ++ HOST_SYNC(t.vcpu, STAGE_COPIED); ++ CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048, ++ GADDR_V(guest_last_page), KEY(2)); ++ ASSERT_MEM_EQ(mem1, mem2, 2048); ++ ++out: + kvm_vm_free(t.kvm_vm); + } + +@@ -335,17 +543,26 @@ static void test_errors(void) + + int main(int argc, char *argv[]) + { +- int memop_cap; ++ int memop_cap, extension_cap; + + setbuf(stdout, NULL); /* Tell stdout not to buffer its content */ + + memop_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP); ++ extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION); + if (!memop_cap) { + print_skip("CAP_S390_MEM_OP not supported"); + exit(KSFT_SKIP); + } + + test_copy(); ++ if (extension_cap > 0) { ++ test_copy_key(); ++ test_copy_key_storage_prot_override(); ++ test_copy_key_fetch_prot(); ++ test_copy_key_fetch_prot_override(); ++ } else { ++ print_skip("storage key memop extension not supported"); ++ } + test_errors(); + + return 0; diff --git a/patches.suse/KVM-s390-selftests-Add-named-stages-for-memop-test b/patches.suse/KVM-s390-selftests-Add-named-stages-for-memop-test new file mode 100644 index 0000000..db923ac --- /dev/null +++ b/patches.suse/KVM-s390-selftests-Add-named-stages-for-memop-test @@ -0,0 +1,110 @@ +From: Janis Schoetterl-Glausch +Date: Tue, 8 Mar 2022 13:58:39 +0100 +Subject: KVM: s390: selftests: Add named stages for memop test +Git-commit: c4816a1b7fed3c000b37ad6516f65aad8bc5fba6 +Patch-mainline: v5.18-rc1 +References: jsc#PED-579 + +The stages synchronize guest and host execution. +This helps the reader and constraits the execution of the test -- if the +observed staging differs from the expected the test fails. + +Signed-off-by: Janis Schoetterl-Glausch +Link: https://lore.kernel.org/r/20220308125841.3271721-4-scgl@linux.ibm.com +Signed-off-by: Christian Borntraeger +Acked-by: Petr Tesarik +--- + tools/testing/selftests/kvm/s390x/memop.c | 44 ++++++++++++++++++++++-------- + 1 file changed, 33 insertions(+), 11 deletions(-) + +--- a/tools/testing/selftests/kvm/s390x/memop.c ++++ b/tools/testing/selftests/kvm/s390x/memop.c +@@ -218,10 +218,32 @@ static struct test_default test_default_ + return t; + } + ++enum stage { ++ /* Synced state set by host, e.g. DAT */ ++ STAGE_INITED, ++ /* Guest did nothing */ ++ STAGE_IDLED, ++ /* Guest copied memory (locations up to test case) */ ++ STAGE_COPIED, ++}; ++ ++#define HOST_SYNC(vcpu_p, stage) \ ++({ \ ++ struct test_vcpu __vcpu = (vcpu_p); \ ++ struct ucall uc; \ ++ int __stage = (stage); \ ++ \ ++ vcpu_run(__vcpu.vm, __vcpu.id); \ ++ get_ucall(__vcpu.vm, __vcpu.id, &uc); \ ++ ASSERT_EQ(uc.cmd, UCALL_SYNC); \ ++ ASSERT_EQ(uc.args[1], __stage); \ ++}) \ ++ + static void guest_copy(void) + { ++ GUEST_SYNC(STAGE_INITED); + memcpy(&mem2, &mem1, sizeof(mem2)); +- GUEST_SYNC(0); ++ GUEST_SYNC(STAGE_COPIED); + } + + static void test_copy(void) +@@ -232,16 +254,13 @@ static void test_copy(void) + for (i = 0; i < sizeof(mem1); i++) + mem1[i] = i * i + i; + ++ HOST_SYNC(t.vcpu, STAGE_INITED); ++ + /* Set the first array */ +- MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, +- GADDR(addr_gva2gpa(t.kvm_vm, (uintptr_t)mem1))); ++ MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1)); + + /* Let the guest code copy the first array to the second */ +- vcpu_run(t.kvm_vm, VCPU_ID); +- TEST_ASSERT(t.run->exit_reason == KVM_EXIT_S390_SIEIC, +- "Unexpected exit reason: %u (%s)\n", +- t.run->exit_reason, +- exit_reason_str(t.run->exit_reason)); ++ HOST_SYNC(t.vcpu, STAGE_COPIED); + + memset(mem2, 0xaa, sizeof(mem2)); + +@@ -256,8 +275,9 @@ static void test_copy(void) + + static void guest_idle(void) + { ++ GUEST_SYNC(STAGE_INITED); /* for consistency's sake */ + for (;;) +- GUEST_SYNC(0); ++ GUEST_SYNC(STAGE_IDLED); + } + + static void test_errors(void) +@@ -265,6 +285,8 @@ static void test_errors(void) + struct test_default t = test_default_init(guest_idle); + int rv; + ++ HOST_SYNC(t.vcpu, STAGE_INITED); ++ + /* Bad size: */ + rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, -1, GADDR_V(mem1)); + TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes"); +@@ -294,11 +316,11 @@ static void test_errors(void) + /* Bad access register: */ + t.run->psw_mask &= ~(3UL << (63 - 17)); + t.run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */ +- vcpu_run(t.kvm_vm, VCPU_ID); /* To sync new state to SIE block */ ++ HOST_SYNC(t.vcpu, STAGE_IDLED); /* To sync new state to SIE block */ + rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), AR(17)); + TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15"); + t.run->psw_mask &= ~(3UL << (63 - 17)); /* Disable AR mode */ +- vcpu_run(t.kvm_vm, VCPU_ID); /* Run to sync new state */ ++ HOST_SYNC(t.vcpu, STAGE_IDLED); /* Run to sync new state */ + + /* Check that the SIDA calls are rejected for non-protected guests */ + rv = ERR_MOP(t.vcpu, SIDA, READ, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0)); diff --git a/patches.suse/KVM-s390-selftests-Split-memop-tests b/patches.suse/KVM-s390-selftests-Split-memop-tests new file mode 100644 index 0000000..d177261 --- /dev/null +++ b/patches.suse/KVM-s390-selftests-Split-memop-tests @@ -0,0 +1,267 @@ +From: Janis Schoetterl-Glausch +Date: Tue, 8 Mar 2022 13:58:37 +0100 +Subject: KVM: s390: selftests: Split memop tests +Git-commit: 70e2f9f0390570077a3904f39c994b348ca6778d +Patch-mainline: v5.18-rc1 +References: jsc#PED-579 + +Split success case/copy test from error test, making them independent. +This means they do not share state and are easier to understand. +Also, new test can be added in the same manner without affecting the old +ones. In order to make that simpler, introduce functionality for the +setup of commonly used variables. + +Signed-off-by: Janis Schoetterl-Glausch +Link: https://lore.kernel.org/r/20220308125841.3271721-2-scgl@linux.ibm.com +Signed-off-by: Christian Borntraeger +Acked-by: Petr Tesarik +--- + tools/testing/selftests/kvm/s390x/memop.c | 137 +++++++++++++++++------------- + 1 file changed, 82 insertions(+), 55 deletions(-) + +--- a/tools/testing/selftests/kvm/s390x/memop.c ++++ b/tools/testing/selftests/kvm/s390x/memop.c +@@ -18,71 +18,82 @@ + static uint8_t mem1[65536]; + static uint8_t mem2[65536]; + +-static void guest_code(void) ++struct test_default { ++ struct kvm_vm *kvm_vm; ++ struct kvm_run *run; ++ int size; ++}; ++ ++static struct test_default test_default_init(void *guest_code) + { +- int i; ++ struct test_default t; + +- for (;;) { +- for (i = 0; i < sizeof(mem2); i++) +- mem2[i] = mem1[i]; +- GUEST_SYNC(0); +- } ++ t.size = min((size_t)kvm_check_cap(KVM_CAP_S390_MEM_OP), sizeof(mem1)); ++ t.kvm_vm = vm_create_default(VCPU_ID, 0, guest_code); ++ t.run = vcpu_state(t.kvm_vm, VCPU_ID); ++ return t; + } + +-int main(int argc, char *argv[]) ++static void guest_copy(void) + { +- struct kvm_vm *vm; +- struct kvm_run *run; +- struct kvm_s390_mem_op ksmo; +- int rv, i, maxsize; +- +- setbuf(stdout, NULL); /* Tell stdout not to buffer its content */ +- +- maxsize = kvm_check_cap(KVM_CAP_S390_MEM_OP); +- if (!maxsize) { +- print_skip("CAP_S390_MEM_OP not supported"); +- exit(KSFT_SKIP); +- } +- if (maxsize > sizeof(mem1)) +- maxsize = sizeof(mem1); ++ memcpy(&mem2, &mem1, sizeof(mem2)); ++ GUEST_SYNC(0); ++} + +- /* Create VM */ +- vm = vm_create_default(VCPU_ID, 0, guest_code); +- run = vcpu_state(vm, VCPU_ID); ++static void test_copy(void) ++{ ++ struct test_default t = test_default_init(guest_copy); ++ struct kvm_s390_mem_op ksmo; ++ int i; + + for (i = 0; i < sizeof(mem1); i++) + mem1[i] = i * i + i; + + /* Set the first array */ +- ksmo.gaddr = addr_gva2gpa(vm, (uintptr_t)mem1); ++ ksmo.gaddr = addr_gva2gpa(t.kvm_vm, (uintptr_t)mem1); + ksmo.flags = 0; +- ksmo.size = maxsize; ++ ksmo.size = t.size; + ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; + ksmo.buf = (uintptr_t)mem1; + ksmo.ar = 0; +- vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + + /* Let the guest code copy the first array to the second */ +- vcpu_run(vm, VCPU_ID); +- TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, ++ vcpu_run(t.kvm_vm, VCPU_ID); ++ TEST_ASSERT(t.run->exit_reason == KVM_EXIT_S390_SIEIC, + "Unexpected exit reason: %u (%s)\n", +- run->exit_reason, +- exit_reason_str(run->exit_reason)); ++ t.run->exit_reason, ++ exit_reason_str(t.run->exit_reason)); + + memset(mem2, 0xaa, sizeof(mem2)); + + /* Get the second array */ + ksmo.gaddr = (uintptr_t)mem2; + ksmo.flags = 0; +- ksmo.size = maxsize; ++ ksmo.size = t.size; + ksmo.op = KVM_S390_MEMOP_LOGICAL_READ; + ksmo.buf = (uintptr_t)mem2; + ksmo.ar = 0; +- vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + +- TEST_ASSERT(!memcmp(mem1, mem2, maxsize), ++ TEST_ASSERT(!memcmp(mem1, mem2, t.size), + "Memory contents do not match!"); + ++ kvm_vm_free(t.kvm_vm); ++} ++ ++static void guest_idle(void) ++{ ++ for (;;) ++ GUEST_SYNC(0); ++} ++ ++static void test_errors(void) ++{ ++ struct test_default t = test_default_init(guest_idle); ++ struct kvm_s390_mem_op ksmo; ++ int rv; ++ + /* Check error conditions - first bad size: */ + ksmo.gaddr = (uintptr_t)mem1; + ksmo.flags = 0; +@@ -90,7 +101,7 @@ int main(int argc, char *argv[]) + ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; + ksmo.buf = (uintptr_t)mem1; + ksmo.ar = 0; +- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes"); + + /* Zero size: */ +@@ -100,65 +111,65 @@ int main(int argc, char *argv[]) + ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; + ksmo.buf = (uintptr_t)mem1; + ksmo.ar = 0; +- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM), + "ioctl allows 0 as size"); + + /* Bad flags: */ + ksmo.gaddr = (uintptr_t)mem1; + ksmo.flags = -1; +- ksmo.size = maxsize; ++ ksmo.size = t.size; + ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; + ksmo.buf = (uintptr_t)mem1; + ksmo.ar = 0; +- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags"); + + /* Bad operation: */ + ksmo.gaddr = (uintptr_t)mem1; + ksmo.flags = 0; +- ksmo.size = maxsize; ++ ksmo.size = t.size; + ksmo.op = -1; + ksmo.buf = (uintptr_t)mem1; + ksmo.ar = 0; +- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations"); + + /* Bad guest address: */ + ksmo.gaddr = ~0xfffUL; + ksmo.flags = KVM_S390_MEMOP_F_CHECK_ONLY; +- ksmo.size = maxsize; ++ ksmo.size = t.size; + ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; + ksmo.buf = (uintptr_t)mem1; + ksmo.ar = 0; +- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory access"); + + /* Bad host address: */ + ksmo.gaddr = (uintptr_t)mem1; + ksmo.flags = 0; +- ksmo.size = maxsize; ++ ksmo.size = t.size; + ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; + ksmo.buf = 0; + ksmo.ar = 0; +- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + TEST_ASSERT(rv == -1 && errno == EFAULT, + "ioctl does not report bad host memory address"); + + /* Bad access register: */ +- run->psw_mask &= ~(3UL << (63 - 17)); +- run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */ +- vcpu_run(vm, VCPU_ID); /* To sync new state to SIE block */ ++ t.run->psw_mask &= ~(3UL << (63 - 17)); ++ t.run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */ ++ vcpu_run(t.kvm_vm, VCPU_ID); /* To sync new state to SIE block */ + ksmo.gaddr = (uintptr_t)mem1; + ksmo.flags = 0; +- ksmo.size = maxsize; ++ ksmo.size = t.size; + ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; + ksmo.buf = (uintptr_t)mem1; + ksmo.ar = 17; +- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15"); +- run->psw_mask &= ~(3UL << (63 - 17)); /* Disable AR mode */ +- vcpu_run(vm, VCPU_ID); /* Run to sync new state */ ++ t.run->psw_mask &= ~(3UL << (63 - 17)); /* Disable AR mode */ ++ vcpu_run(t.kvm_vm, VCPU_ID); /* Run to sync new state */ + + /* Check that the SIDA calls are rejected for non-protected guests */ + ksmo.gaddr = 0; +@@ -167,15 +178,31 @@ int main(int argc, char *argv[]) + ksmo.op = KVM_S390_MEMOP_SIDA_READ; + ksmo.buf = (uintptr_t)mem1; + ksmo.sida_offset = 0x1c0; +- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + TEST_ASSERT(rv == -1 && errno == EINVAL, + "ioctl does not reject SIDA_READ in non-protected mode"); + ksmo.op = KVM_S390_MEMOP_SIDA_WRITE; +- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ rv = _vcpu_ioctl(t.kvm_vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + TEST_ASSERT(rv == -1 && errno == EINVAL, + "ioctl does not reject SIDA_WRITE in non-protected mode"); + +- kvm_vm_free(vm); ++ kvm_vm_free(t.kvm_vm); ++} ++ ++int main(int argc, char *argv[]) ++{ ++ int memop_cap; ++ ++ setbuf(stdout, NULL); /* Tell stdout not to buffer its content */ ++ ++ memop_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP); ++ if (!memop_cap) { ++ print_skip("CAP_S390_MEM_OP not supported"); ++ exit(KSFT_SKIP); ++ } ++ ++ test_copy(); ++ test_errors(); + + return 0; + } diff --git a/patches.suse/KVM-s390-selftests-Test-TEST-PROTECTION-emulation b/patches.suse/KVM-s390-selftests-Test-TEST-PROTECTION-emulation new file mode 100644 index 0000000..e32ac2a --- /dev/null +++ b/patches.suse/KVM-s390-selftests-Test-TEST-PROTECTION-emulation @@ -0,0 +1,273 @@ +From: Janis Schoetterl-Glausch +Date: Fri, 11 Feb 2022 19:22:09 +0100 +Subject: KVM: s390: selftests: Test TEST PROTECTION emulation +Git-commit: c7ef9ebbed20b860f70cc7bece65622b570a8a93 +Patch-mainline: v5.18-rc1 +References: jsc#PED-579 + +Test the emulation of TEST PROTECTION in the presence of storage keys. +Emulation only occurs under certain conditions, one of which is the host +page being protected. +Trigger this by protecting the test pages via mprotect. + +Signed-off-by: Janis Schoetterl-Glausch +Reviewed-by: Janosch Frank +Link: https://lore.kernel.org/r/20220211182215.2730017-5-scgl@linux.ibm.com +Signed-off-by: Christian Borntraeger +Acked-by: Petr Tesarik +--- + tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/s390x/tprot.c | 227 ++++++++++++++++++++++++++++++ + 3 files changed, 229 insertions(+) + +--- a/tools/testing/selftests/kvm/.gitignore ++++ b/tools/testing/selftests/kvm/.gitignore +@@ -5,6 +5,7 @@ + /s390x/memop + /s390x/resets + /s390x/sync_regs_test ++/s390x/tprot + /x86_64/cr4_cpuid_sync_test + /x86_64/debug_regs + /x86_64/evmcs_test +--- a/tools/testing/selftests/kvm/Makefile ++++ b/tools/testing/selftests/kvm/Makefile +@@ -101,6 +101,7 @@ TEST_GEN_PROGS_aarch64 += kvm_binary_sta + TEST_GEN_PROGS_s390x = s390x/memop + TEST_GEN_PROGS_s390x += s390x/resets + TEST_GEN_PROGS_s390x += s390x/sync_regs_test ++TEST_GEN_PROGS_s390x += s390x/tprot + TEST_GEN_PROGS_s390x += demand_paging_test + TEST_GEN_PROGS_s390x += dirty_log_test + TEST_GEN_PROGS_s390x += kvm_create_max_vcpus +--- /dev/null ++++ b/tools/testing/selftests/kvm/s390x/tprot.c +@@ -0,0 +1,227 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later ++/* ++ * Test TEST PROTECTION emulation. ++ * ++ * Copyright IBM Corp. 2021 ++ */ ++ ++#include ++#include "test_util.h" ++#include "kvm_util.h" ++ ++#define PAGE_SHIFT 12 ++#define PAGE_SIZE (1 << PAGE_SHIFT) ++#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) ++#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) ++ ++#define VCPU_ID 1 ++ ++static __aligned(PAGE_SIZE) uint8_t pages[2][PAGE_SIZE]; ++static uint8_t *const page_store_prot = pages[0]; ++static uint8_t *const page_fetch_prot = pages[1]; ++ ++/* Nonzero return value indicates that address not mapped */ ++static int set_storage_key(void *addr, uint8_t key) ++{ ++ int not_mapped = 0; ++ ++ asm volatile ( ++ "lra %[addr], 0(0,%[addr])\n" ++ " jz 0f\n" ++ " llill %[not_mapped],1\n" ++ " j 1f\n" ++ "0: sske %[key], %[addr]\n" ++ "1:" ++ : [addr] "+&a" (addr), [not_mapped] "+r" (not_mapped) ++ : [key] "r" (key) ++ : "cc" ++ ); ++ return -not_mapped; ++} ++ ++enum permission { ++ READ_WRITE = 0, ++ READ = 1, ++ RW_PROTECTED = 2, ++ TRANSL_UNAVAIL = 3, ++}; ++ ++static enum permission test_protection(void *addr, uint8_t key) ++{ ++ uint64_t mask; ++ ++ asm volatile ( ++ "tprot %[addr], 0(%[key])\n" ++ " ipm %[mask]\n" ++ : [mask] "=r" (mask) ++ : [addr] "Q" (*(char *)addr), ++ [key] "a" (key) ++ : "cc" ++ ); ++ ++ return (enum permission)(mask >> 28); ++} ++ ++enum stage { ++ STAGE_END, ++ STAGE_INIT_SIMPLE, ++ TEST_SIMPLE, ++ STAGE_INIT_FETCH_PROT_OVERRIDE, ++ TEST_FETCH_PROT_OVERRIDE, ++ TEST_STORAGE_PROT_OVERRIDE, ++}; ++ ++struct test { ++ enum stage stage; ++ void *addr; ++ uint8_t key; ++ enum permission expected; ++} tests[] = { ++ /* ++ * We perform each test in the array by executing TEST PROTECTION on ++ * the specified addr with the specified key and checking if the returned ++ * permissions match the expected value. ++ * Both guest and host cooperate to set up the required test conditions. ++ * A central condition is that the page targeted by addr has to be DAT ++ * protected in the host mappings, in order for KVM to emulate the ++ * TEST PROTECTION instruction. ++ * Since the page tables are shared, the host uses mprotect to achieve ++ * this. ++ * ++ * Test resulting in RW_PROTECTED/TRANSL_UNAVAIL will be interpreted ++ * by SIE, not KVM, but there is no harm in testing them also. ++ * See Enhanced Suppression-on-Protection Facilities in the ++ * Interpretive-Execution Mode ++ */ ++ /* ++ * guest: set storage key of page_store_prot to 1 ++ * storage key of page_fetch_prot to 9 and enable ++ * protection for it ++ * STAGE_INIT_SIMPLE ++ * host: write protect both via mprotect ++ */ ++ /* access key 0 matches any storage key -> RW */ ++ { TEST_SIMPLE, page_store_prot, 0x00, READ_WRITE }, ++ /* access key matches storage key -> RW */ ++ { TEST_SIMPLE, page_store_prot, 0x10, READ_WRITE }, ++ /* mismatched keys, but no fetch protection -> RO */ ++ { TEST_SIMPLE, page_store_prot, 0x20, READ }, ++ /* access key 0 matches any storage key -> RW */ ++ { TEST_SIMPLE, page_fetch_prot, 0x00, READ_WRITE }, ++ /* access key matches storage key -> RW */ ++ { TEST_SIMPLE, page_fetch_prot, 0x90, READ_WRITE }, ++ /* mismatched keys, fetch protection -> inaccessible */ ++ { TEST_SIMPLE, page_fetch_prot, 0x10, RW_PROTECTED }, ++ /* page 0 not mapped yet -> translation not available */ ++ { TEST_SIMPLE, (void *)0x00, 0x10, TRANSL_UNAVAIL }, ++ /* ++ * host: try to map page 0 ++ * guest: set storage key of page 0 to 9 and enable fetch protection ++ * STAGE_INIT_FETCH_PROT_OVERRIDE ++ * host: write protect page 0 ++ * enable fetch protection override ++ */ ++ /* mismatched keys, fetch protection, but override applies -> RO */ ++ { TEST_FETCH_PROT_OVERRIDE, (void *)0x00, 0x10, READ }, ++ /* mismatched keys, fetch protection, override applies to 0-2048 only -> inaccessible */ ++ { TEST_FETCH_PROT_OVERRIDE, (void *)2049, 0x10, RW_PROTECTED }, ++ /* ++ * host: enable storage protection override ++ */ ++ /* mismatched keys, but override applies (storage key 9) -> RW */ ++ { TEST_STORAGE_PROT_OVERRIDE, page_fetch_prot, 0x10, READ_WRITE }, ++ /* mismatched keys, no fetch protection, override doesn't apply -> RO */ ++ { TEST_STORAGE_PROT_OVERRIDE, page_store_prot, 0x20, READ }, ++ /* mismatched keys, but override applies (storage key 9) -> RW */ ++ { TEST_STORAGE_PROT_OVERRIDE, (void *)2049, 0x10, READ_WRITE }, ++ /* end marker */ ++ { STAGE_END, 0, 0, 0 }, ++}; ++ ++static enum stage perform_next_stage(int *i, bool mapped_0) ++{ ++ enum stage stage = tests[*i].stage; ++ enum permission result; ++ bool skip; ++ ++ for (; tests[*i].stage == stage; (*i)++) { ++ /* ++ * Some fetch protection override tests require that page 0 ++ * be mapped, however, when the hosts tries to map that page via ++ * vm_vaddr_alloc, it may happen that some other page gets mapped ++ * instead. ++ * In order to skip these tests we detect this inside the guest ++ */ ++ skip = tests[*i].addr < (void *)4096 && ++ tests[*i].expected != TRANSL_UNAVAIL && ++ !mapped_0; ++ if (!skip) { ++ result = test_protection(tests[*i].addr, tests[*i].key); ++ GUEST_ASSERT_2(result == tests[*i].expected, *i, result); ++ } ++ } ++ return stage; ++} ++ ++static void guest_code(void) ++{ ++ bool mapped_0; ++ int i = 0; ++ ++ GUEST_ASSERT_EQ(set_storage_key(page_store_prot, 0x10), 0); ++ GUEST_ASSERT_EQ(set_storage_key(page_fetch_prot, 0x98), 0); ++ GUEST_SYNC(STAGE_INIT_SIMPLE); ++ GUEST_SYNC(perform_next_stage(&i, false)); ++ ++ /* Fetch-protection override */ ++ mapped_0 = !set_storage_key((void *)0, 0x98); ++ GUEST_SYNC(STAGE_INIT_FETCH_PROT_OVERRIDE); ++ GUEST_SYNC(perform_next_stage(&i, mapped_0)); ++ ++ /* Storage-protection override */ ++ GUEST_SYNC(perform_next_stage(&i, mapped_0)); ++} ++ ++#define HOST_SYNC(vmp, stage) \ ++({ \ ++ struct kvm_vm *__vm = (vmp); \ ++ struct ucall uc; \ ++ int __stage = (stage); \ ++ \ ++ vcpu_run(__vm, VCPU_ID); \ ++ get_ucall(__vm, VCPU_ID, &uc); \ ++ if (uc.cmd == UCALL_ABORT) { \ ++ TEST_FAIL("line %lu: %s, hints: %lu, %lu", uc.args[1], \ ++ (const char *)uc.args[0], uc.args[2], uc.args[3]); \ ++ } \ ++ ASSERT_EQ(uc.cmd, UCALL_SYNC); \ ++ ASSERT_EQ(uc.args[1], __stage); \ ++}) ++ ++int main(int argc, char *argv[]) ++{ ++ struct kvm_vm *vm; ++ struct kvm_run *run; ++ vm_vaddr_t guest_0_page; ++ ++ vm = vm_create_default(VCPU_ID, 0, guest_code); ++ run = vcpu_state(vm, VCPU_ID); ++ ++ HOST_SYNC(vm, STAGE_INIT_SIMPLE); ++ mprotect(addr_gva2hva(vm, (vm_vaddr_t)pages), PAGE_SIZE * 2, PROT_READ); ++ HOST_SYNC(vm, TEST_SIMPLE); ++ ++ guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0); ++ if (guest_0_page != 0) ++ print_skip("Did not allocate page at 0 for fetch protection override tests"); ++ HOST_SYNC(vm, STAGE_INIT_FETCH_PROT_OVERRIDE); ++ if (guest_0_page == 0) ++ mprotect(addr_gva2hva(vm, (vm_vaddr_t)0), PAGE_SIZE, PROT_READ); ++ run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE; ++ run->kvm_dirty_regs = KVM_SYNC_CRS; ++ HOST_SYNC(vm, TEST_FETCH_PROT_OVERRIDE); ++ ++ run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE; ++ run->kvm_dirty_regs = KVM_SYNC_CRS; ++ HOST_SYNC(vm, TEST_STORAGE_PROT_OVERRIDE); ++} diff --git a/patches.suse/MIPS-Loongson-Use-hwmon_device_register_with_groups-.patch b/patches.suse/MIPS-Loongson-Use-hwmon_device_register_with_groups-.patch new file mode 100644 index 0000000..b767bd1 --- /dev/null +++ b/patches.suse/MIPS-Loongson-Use-hwmon_device_register_with_groups-.patch @@ -0,0 +1,212 @@ +From abae018a03821be2b65c01ebe2bef06fd7d85a4c Mon Sep 17 00:00:00 2001 +From: Guenter Roeck +Date: Wed, 11 May 2022 07:56:59 -0700 +Subject: [PATCH] MIPS: Loongson: Use hwmon_device_register_with_groups() to + register hwmon +Git-commit: abae018a03821be2b65c01ebe2bef06fd7d85a4c +References: git-fixes +Patch-mainline: v5.19-rc1 + +Calling hwmon_device_register_with_info() with NULL dev and/or chip +information parameters is an ABI abuse and not a real conversion to +the new API. Also, the code creates sysfs attributes _after_ creating +the hwmon device, which is racy and unsupported to start with. On top +of that, the removal code tries to remove the name attribute which is +owned by the hwmon core. + +Use hwmon_device_register_with_groups() to register the hwmon device +instead. + +In the future, the hwmon subsystem will reject calls to +hwmon_device_register_with_info with NULL dev or chip/info parameters. +Without this patch, the hwmon device will fail to register. + +Fixes: f59dc5119192 ("MIPS: Loongson: Fix boot warning about hwmon_device_register()") +Cc: Zhi Li +Signed-off-by: Guenter Roeck +Signed-off-by: Thomas Bogendoerfer +Signed-off-by: Oliver Neukum +--- + drivers/platform/mips/cpu_hwmon.c | 127 ++++++++++-------------------- + 1 file changed, 41 insertions(+), 86 deletions(-) + +diff --git a/drivers/platform/mips/cpu_hwmon.c b/drivers/platform/mips/cpu_hwmon.c +index 386389ffec41..d8c5f9195f85 100644 +--- a/drivers/platform/mips/cpu_hwmon.c ++++ b/drivers/platform/mips/cpu_hwmon.c +@@ -55,55 +55,6 @@ int loongson3_cpu_temp(int cpu) + static int nr_packages; + static struct device *cpu_hwmon_dev; + +-static SENSOR_DEVICE_ATTR(name, 0444, NULL, NULL, 0); +- +-static struct attribute *cpu_hwmon_attributes[] = { +- &sensor_dev_attr_name.dev_attr.attr, +- NULL +-}; +- +-/* Hwmon device attribute group */ +-static struct attribute_group cpu_hwmon_attribute_group = { +- .attrs = cpu_hwmon_attributes, +-}; +- +-static ssize_t get_cpu_temp(struct device *dev, +- struct device_attribute *attr, char *buf); +-static ssize_t cpu_temp_label(struct device *dev, +- struct device_attribute *attr, char *buf); +- +-static SENSOR_DEVICE_ATTR(temp1_input, 0444, get_cpu_temp, NULL, 1); +-static SENSOR_DEVICE_ATTR(temp1_label, 0444, cpu_temp_label, NULL, 1); +-static SENSOR_DEVICE_ATTR(temp2_input, 0444, get_cpu_temp, NULL, 2); +-static SENSOR_DEVICE_ATTR(temp2_label, 0444, cpu_temp_label, NULL, 2); +-static SENSOR_DEVICE_ATTR(temp3_input, 0444, get_cpu_temp, NULL, 3); +-static SENSOR_DEVICE_ATTR(temp3_label, 0444, cpu_temp_label, NULL, 3); +-static SENSOR_DEVICE_ATTR(temp4_input, 0444, get_cpu_temp, NULL, 4); +-static SENSOR_DEVICE_ATTR(temp4_label, 0444, cpu_temp_label, NULL, 4); +- +-static const struct attribute *hwmon_cputemp[4][3] = { +- { +- &sensor_dev_attr_temp1_input.dev_attr.attr, +- &sensor_dev_attr_temp1_label.dev_attr.attr, +- NULL +- }, +- { +- &sensor_dev_attr_temp2_input.dev_attr.attr, +- &sensor_dev_attr_temp2_label.dev_attr.attr, +- NULL +- }, +- { +- &sensor_dev_attr_temp3_input.dev_attr.attr, +- &sensor_dev_attr_temp3_label.dev_attr.attr, +- NULL +- }, +- { +- &sensor_dev_attr_temp4_input.dev_attr.attr, +- &sensor_dev_attr_temp4_label.dev_attr.attr, +- NULL +- } +-}; +- + static ssize_t cpu_temp_label(struct device *dev, + struct device_attribute *attr, char *buf) + { +@@ -121,24 +72,47 @@ static ssize_t get_cpu_temp(struct device *dev, + return sprintf(buf, "%d\n", value); + } + +-static int create_sysfs_cputemp_files(struct kobject *kobj) +-{ +- int i, ret = 0; +- +- for (i = 0; i < nr_packages; i++) +- ret = sysfs_create_files(kobj, hwmon_cputemp[i]); ++static SENSOR_DEVICE_ATTR(temp1_input, 0444, get_cpu_temp, NULL, 1); ++static SENSOR_DEVICE_ATTR(temp1_label, 0444, cpu_temp_label, NULL, 1); ++static SENSOR_DEVICE_ATTR(temp2_input, 0444, get_cpu_temp, NULL, 2); ++static SENSOR_DEVICE_ATTR(temp2_label, 0444, cpu_temp_label, NULL, 2); ++static SENSOR_DEVICE_ATTR(temp3_input, 0444, get_cpu_temp, NULL, 3); ++static SENSOR_DEVICE_ATTR(temp3_label, 0444, cpu_temp_label, NULL, 3); ++static SENSOR_DEVICE_ATTR(temp4_input, 0444, get_cpu_temp, NULL, 4); ++static SENSOR_DEVICE_ATTR(temp4_label, 0444, cpu_temp_label, NULL, 4); + +- return ret; +-} ++static struct attribute *cpu_hwmon_attributes[] = { ++ &sensor_dev_attr_temp1_input.dev_attr.attr, ++ &sensor_dev_attr_temp1_label.dev_attr.attr, ++ &sensor_dev_attr_temp2_input.dev_attr.attr, ++ &sensor_dev_attr_temp2_label.dev_attr.attr, ++ &sensor_dev_attr_temp3_input.dev_attr.attr, ++ &sensor_dev_attr_temp3_label.dev_attr.attr, ++ &sensor_dev_attr_temp4_input.dev_attr.attr, ++ &sensor_dev_attr_temp4_label.dev_attr.attr, ++ NULL ++}; + +-static void remove_sysfs_cputemp_files(struct kobject *kobj) ++static umode_t cpu_hwmon_is_visible(struct kobject *kobj, ++ struct attribute *attr, int i) + { +- int i; ++ int id = i / 2; + +- for (i = 0; i < nr_packages; i++) +- sysfs_remove_files(kobj, hwmon_cputemp[i]); ++ if (id < nr_packages) ++ return attr->mode; ++ return 0; + } + ++static struct attribute_group cpu_hwmon_group = { ++ .attrs = cpu_hwmon_attributes, ++ .is_visible = cpu_hwmon_is_visible, ++}; ++ ++static const struct attribute_group *cpu_hwmon_groups[] = { ++ &cpu_hwmon_group, ++ NULL ++}; ++ + #define CPU_THERMAL_THRESHOLD 90000 + static struct delayed_work thermal_work; + +@@ -159,50 +133,31 @@ static void do_thermal_timer(struct work_struct *work) + + static int __init loongson_hwmon_init(void) + { +- int ret; +- + pr_info("Loongson Hwmon Enter...\n"); + + if (cpu_has_csr()) + csr_temp_enable = csr_readl(LOONGSON_CSR_FEATURES) & + LOONGSON_CSRF_TEMP; + +- cpu_hwmon_dev = hwmon_device_register_with_info(NULL, "cpu_hwmon", NULL, NULL, NULL); +- if (IS_ERR(cpu_hwmon_dev)) { +- ret = PTR_ERR(cpu_hwmon_dev); +- pr_err("hwmon_device_register fail!\n"); +- goto fail_hwmon_device_register; +- } +- + nr_packages = loongson_sysconf.nr_cpus / + loongson_sysconf.cores_per_package; + +- ret = create_sysfs_cputemp_files(&cpu_hwmon_dev->kobj); +- if (ret) { +- pr_err("fail to create cpu temperature interface!\n"); +- goto fail_create_sysfs_cputemp_files; ++ cpu_hwmon_dev = hwmon_device_register_with_groups(NULL, "cpu_hwmon", ++ NULL, cpu_hwmon_groups); ++ if (IS_ERR(cpu_hwmon_dev)) { ++ pr_err("hwmon_device_register fail!\n"); ++ return PTR_ERR(cpu_hwmon_dev); + } + + INIT_DEFERRABLE_WORK(&thermal_work, do_thermal_timer); + schedule_delayed_work(&thermal_work, msecs_to_jiffies(20000)); + +- return ret; +- +-fail_create_sysfs_cputemp_files: +- sysfs_remove_group(&cpu_hwmon_dev->kobj, +- &cpu_hwmon_attribute_group); +- hwmon_device_unregister(cpu_hwmon_dev); +- +-fail_hwmon_device_register: +- return ret; ++ return 0; + } + + static void __exit loongson_hwmon_exit(void) + { + cancel_delayed_work_sync(&thermal_work); +- remove_sysfs_cputemp_files(&cpu_hwmon_dev->kobj); +- sysfs_remove_group(&cpu_hwmon_dev->kobj, +- &cpu_hwmon_attribute_group); + hwmon_device_unregister(cpu_hwmon_dev); + } + +-- +2.35.3 + diff --git a/patches.suse/NFSD-Cap-rsize_bop-result-based-on-send-buffer-size.patch b/patches.suse/NFSD-Cap-rsize_bop-result-based-on-send-buffer-size.patch new file mode 100644 index 0000000..b889377 --- /dev/null +++ b/patches.suse/NFSD-Cap-rsize_bop-result-based-on-send-buffer-size.patch @@ -0,0 +1,138 @@ +From: Chuck Lever +Date: Thu, 1 Sep 2022 15:29:55 -0400 +Subject: [PATCH] NFSD: Cap rsize_bop result based on send buffer size +Git-commit: 76ce4dcec0dc08a032db916841ddc4e3998be317 +Patch-mainline: v6.1 +References: bsc#1205128 CVE-2022-43945 + +Since before the git era, NFSD has conserved the number of pages +held by each nfsd thread by combining the RPC receive and send +buffers into a single array of pages. This works because there are +no cases where an operation needs a large RPC Call message and a +large RPC Reply at the same time. + +Once an RPC Call has been received, svc_process() updates +svc_rqst::rq_res to describe the part of rq_pages that can be +used for constructing the Reply. This means that the send buffer +(rq_res) shrinks when the received RPC record containing the RPC +Call is large. + +Add an NFSv4 helper that computes the size of the send buffer. It +replaces svc_max_payload() in spots where svc_max_payload() returns +a value that might be larger than the remaining send buffer space. +Callers who need to know the transport's actual maximum payload size +will continue to use svc_max_payload(). + +Signed-off-by: Chuck Lever +Acked-by: NeilBrown + +--- + fs/nfsd/nfs4proc.c | 48 ++++++++++++++++++++++++------------------------ + 1 file changed, 24 insertions(+), 24 deletions(-) + +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -2623,6 +2623,22 @@ out: + + #define op_encode_channel_attrs_maxsz (6 + 1 + 1) + ++/* ++ * The _rsize() helpers are invoked by the NFSv4 COMPOUND decoder, which ++ * is called before sunrpc sets rq_res.buflen. Thus we have to compute ++ * the maximum payload size here, based on transport limits and the size ++ * of the remaining space in the rq_pages array. ++ */ ++static u32 nfsd4_max_payload(const struct svc_rqst *rqstp) ++{ ++ u32 buflen; ++ ++ buflen = (rqstp->rq_page_end - rqstp->rq_next_page) * PAGE_SIZE; ++ buflen -= rqstp->rq_auth_slack; ++ buflen -= rqstp->rq_res.head[0].iov_len; ++ return min_t(u32, buflen, svc_max_payload(rqstp)); ++} ++ + static inline u32 nfsd4_only_status_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) + { + return (op_encode_hdr_size) * sizeof(__be32); +@@ -2663,9 +2679,9 @@ static inline u32 nfsd4_getattr_rsize(st + u32 ret = 0; + + if (bmap0 & FATTR4_WORD0_ACL) +- return svc_max_payload(rqstp); ++ return nfsd4_max_payload(rqstp); + if (bmap0 & FATTR4_WORD0_FS_LOCATIONS) +- return svc_max_payload(rqstp); ++ return nfsd4_max_payload(rqstp); + + if (bmap1 & FATTR4_WORD1_OWNER) { + ret += IDMAP_NAMESZ + 4; +@@ -2720,18 +2736,14 @@ static inline u32 nfsd4_open_rsize(struc + + static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) + { +- u32 maxcount = 0, rlen = 0; +- +- maxcount = svc_max_payload(rqstp); +- rlen = min(op->u.read.rd_length, maxcount); ++ u32 rlen = min(op->u.read.rd_length, nfsd4_max_payload(rqstp)); + + return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32); + } + + static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) + { +- u32 maxcount = svc_max_payload(rqstp); +- u32 rlen = min(op->u.read.rd_length, maxcount); ++ u32 rlen = min(op->u.read.rd_length, nfsd4_max_payload(rqstp)); + /* + * If we detect that the file changed during hole encoding, then we + * recover by encoding the remaining reply as data. This means we need +@@ -2744,10 +2756,7 @@ static inline u32 nfsd4_read_plus_rsize( + + static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) + { +- u32 maxcount = 0, rlen = 0; +- +- maxcount = svc_max_payload(rqstp); +- rlen = min(op->u.readdir.rd_maxcount, maxcount); ++ u32 rlen = min(op->u.readdir.rd_maxcount, nfsd4_max_payload(rqstp)); + + return (op_encode_hdr_size + op_encode_verifier_maxsz + + XDR_QUADLEN(rlen)) * sizeof(__be32); +@@ -2873,10 +2882,7 @@ static inline u32 nfsd4_copy_notify_rsiz + #ifdef CONFIG_NFSD_PNFS + static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) + { +- u32 maxcount = 0, rlen = 0; +- +- maxcount = svc_max_payload(rqstp); +- rlen = min(op->u.getdeviceinfo.gd_maxcount, maxcount); ++ u32 rlen = min(op->u.getdeviceinfo.gd_maxcount, nfsd4_max_payload(rqstp)); + + return (op_encode_hdr_size + + 1 /* gd_layout_type*/ + +@@ -2922,10 +2928,7 @@ static inline u32 nfsd4_seek_rsize(struc + static inline u32 nfsd4_getxattr_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) + { +- u32 maxcount, rlen; +- +- maxcount = svc_max_payload(rqstp); +- rlen = min_t(u32, XATTR_SIZE_MAX, maxcount); ++ u32 rlen = min_t(u32, XATTR_SIZE_MAX, nfsd4_max_payload(rqstp)); + + return (op_encode_hdr_size + 1 + XDR_QUADLEN(rlen)) * sizeof(__be32); + } +@@ -2939,10 +2942,7 @@ static inline u32 nfsd4_setxattr_rsize(s + static inline u32 nfsd4_listxattrs_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) + { +- u32 maxcount, rlen; +- +- maxcount = svc_max_payload(rqstp); +- rlen = min(op->u.listxattrs.lsxa_maxcount, maxcount); ++ u32 rlen = min(op->u.listxattrs.lsxa_maxcount, nfsd4_max_payload(rqstp)); + + return (op_encode_hdr_size + 4 + XDR_QUADLEN(rlen)) * sizeof(__be32); + } diff --git a/patches.suse/NFSD-Protect-against-send-buffer-overflow-in-NFSv2-R.patch b/patches.suse/NFSD-Protect-against-send-buffer-overflow-in-NFSv2-R.patch new file mode 100644 index 0000000..68a48e4 --- /dev/null +++ b/patches.suse/NFSD-Protect-against-send-buffer-overflow-in-NFSv2-R.patch @@ -0,0 +1,43 @@ +From: Chuck Lever +Date: Thu, 1 Sep 2022 15:10:18 -0400 +Subject: [PATCH] NFSD: Protect against send buffer overflow in NFSv2 READ +Git-commit: 401bc1f90874280a80b93f23be33a0e7e2d1f912 +Patch-mainline: v6.1 +References: bsc#1205128 CVE-2022-43945 + +Since before the git era, NFSD has conserved the number of pages +held by each nfsd thread by combining the RPC receive and send +buffers into a single array of pages. This works because there are +no cases where an operation needs a large RPC Call message and a +large RPC Reply at the same time. + +Once an RPC Call has been received, svc_process() updates +svc_rqst::rq_res to describe the part of rq_pages that can be +used for constructing the Reply. This means that the send buffer +(rq_res) shrinks when the received RPC record containing the RPC +Call is large. + +A client can force this shrinkage on TCP by sending a correctly- +formed RPC Call header contained in an RPC record that is +excessively large. The full maximum payload size cannot be +constructed in that case. + +Cc: +Signed-off-by: Chuck Lever +Reviewed-by: Jeff Layton +Acked-by: NeilBrown + +--- + fs/nfsd/nfsproc.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/nfsd/nfsproc.c ++++ b/fs/nfsd/nfsproc.c +@@ -182,6 +182,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) + argp->count, argp->offset); + + argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2); ++ argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); + + v = 0; + len = argp->count; diff --git a/patches.suse/NFSD-Protect-against-send-buffer-overflow-in-NFSv2-Rdir.patch b/patches.suse/NFSD-Protect-against-send-buffer-overflow-in-NFSv2-Rdir.patch new file mode 100644 index 0000000..625a617 --- /dev/null +++ b/patches.suse/NFSD-Protect-against-send-buffer-overflow-in-NFSv2-Rdir.patch @@ -0,0 +1,36 @@ +From: Chuck Lever +Date: Thu, 1 Sep 2022 15:10:05 -0400 +Subject: [PATCH] NFSD: Protect against send buffer overflow in NFSv2 READDIR +Git-commit: 00b4492686e0497fdb924a9d4c8f6f99377e176c +Patch-mainline: v6.1 +References: bsc#1205128 CVE-2022-43945 + +Restore the previous limit on the @count argument to prevent a +buffer overflow attack. + +Fixes: 53b1119a6e50 ("NFSD: Fix READDIR buffer overflow") +Signed-off-by: Chuck Lever +Reviewed-by: Jeff Layton +Acked-by: NeilBrown + +--- + fs/nfsd/nfsproc.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/fs/nfsd/nfsproc.c ++++ b/fs/nfsd/nfsproc.c +@@ -557,12 +557,11 @@ static void nfsd_init_dirlist_pages(stru + struct xdr_buf *buf = &resp->dirlist; + struct xdr_stream *xdr = &resp->xdr; + +- count = clamp(count, (u32)(XDR_UNIT * 2), svc_max_payload(rqstp)); +- + memset(buf, 0, sizeof(*buf)); + + /* Reserve room for the NULL ptr & eof flag (-2 words) */ +- buf->buflen = count - XDR_UNIT * 2; ++ buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), (u32)PAGE_SIZE); ++ buf->buflen -= XDR_UNIT * 2; + buf->pages = rqstp->rq_next_page; + rqstp->rq_next_page++; + diff --git a/patches.suse/NFSD-Protect-against-send-buffer-overflow-in-NFSv3-R.patch b/patches.suse/NFSD-Protect-against-send-buffer-overflow-in-NFSv3-R.patch new file mode 100644 index 0000000..a191ef9 --- /dev/null +++ b/patches.suse/NFSD-Protect-against-send-buffer-overflow-in-NFSv3-R.patch @@ -0,0 +1,53 @@ +From: Chuck Lever +Date: Thu, 1 Sep 2022 15:10:24 -0400 +Subject: [PATCH] NFSD: Protect against send buffer overflow in NFSv3 READ +Git-commit: fa6be9cc6e80ec79892ddf08a8c10cabab9baf38 +Patch-mainline: v6.1 +References: bsc#1205128 CVE-2022-43945 + +Since before the git era, NFSD has conserved the number of pages +held by each nfsd thread by combining the RPC receive and send +buffers into a single array of pages. This works because there are +no cases where an operation needs a large RPC Call message and a +large RPC Reply at the same time. + +Once an RPC Call has been received, svc_process() updates +svc_rqst::rq_res to describe the part of rq_pages that can be +used for constructing the Reply. This means that the send buffer +(rq_res) shrinks when the received RPC record containing the RPC +Call is large. + +A client can force this shrinkage on TCP by sending a correctly- +formed RPC Call header contained in an RPC record that is +excessively large. The full maximum payload size cannot be +constructed in that case. + +Cc: +Signed-off-by: Chuck Lever +Reviewed-by: Jeff Layton +Acked-by: NeilBrown + +--- + fs/nfsd/nfs3proc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/nfsd/nfs3proc.c ++++ b/fs/nfsd/nfs3proc.c +@@ -146,7 +146,6 @@ nfsd3_proc_read(struct svc_rqst *rqstp) + { + struct nfsd3_readargs *argp = rqstp->rq_argp; + struct nfsd3_readres *resp = rqstp->rq_resp; +- u32 max_blocksize = svc_max_payload(rqstp); + unsigned int len; + int v; + +@@ -155,7 +154,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp) + (unsigned long) argp->count, + (unsigned long long) argp->offset); + +- argp->count = min_t(u32, argp->count, max_blocksize); ++ argp->count = min_t(u32, argp->count, svc_max_payload(rqstp)); ++ argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); + if (argp->offset > (u64)OFFSET_MAX) + argp->offset = (u64)OFFSET_MAX; + if (argp->offset + argp->count > (u64)OFFSET_MAX) diff --git a/patches.suse/NFSD-Protect-against-send-buffer-overflow-in-NFSv3-Rdir.patch b/patches.suse/NFSD-Protect-against-send-buffer-overflow-in-NFSv3-Rdir.patch new file mode 100644 index 0000000..eabacb8 --- /dev/null +++ b/patches.suse/NFSD-Protect-against-send-buffer-overflow-in-NFSv3-Rdir.patch @@ -0,0 +1,57 @@ +From: Chuck Lever +Date: Thu, 1 Sep 2022 15:10:12 -0400 +Subject: [PATCH] NFSD: Protect against send buffer overflow in NFSv3 READDIR +Git-commit: 640f87c190e0d1b2a0fcb2ecf6d2cd53b1c41991 +Patch-mainline: v6.1 +References: bsc#1205128 CVE-2022-43945 + +Since before the git era, NFSD has conserved the number of pages +held by each nfsd thread by combining the RPC receive and send +buffers into a single array of pages. This works because there are +no cases where an operation needs a large RPC Call message and a +large RPC Reply message at the same time. + +Once an RPC Call has been received, svc_process() updates +svc_rqst::rq_res to describe the part of rq_pages that can be +used for constructing the Reply. This means that the send buffer +(rq_res) shrinks when the received RPC record containing the RPC +Call is large. + +A client can force this shrinkage on TCP by sending a correctly- +formed RPC Call header contained in an RPC record that is +excessively large. The full maximum payload size cannot be +constructed in that case. + +Thanks to Aleksi Illikainen and Kari Hulkko for uncovering this +issue. + +Reported-by: Ben Ronallo +Cc: +Signed-off-by: Chuck Lever +Reviewed-by: Jeff Layton +Acked-by: NeilBrown + +--- + fs/nfsd/nfs3proc.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/fs/nfsd/nfs3proc.c ++++ b/fs/nfsd/nfs3proc.c +@@ -448,13 +448,14 @@ static void nfsd3_init_dirlist_pages(str + { + struct xdr_buf *buf = &resp->dirlist; + struct xdr_stream *xdr = &resp->xdr; +- +- count = clamp(count, (u32)(XDR_UNIT * 2), svc_max_payload(rqstp)); ++ unsigned int sendbuf = min_t(unsigned int, rqstp->rq_res.buflen, ++ svc_max_payload(rqstp)); + + memset(buf, 0, sizeof(*buf)); + + /* Reserve room for the NULL ptr & eof flag (-2 words) */ +- buf->buflen = count - XDR_UNIT * 2; ++ buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), sendbuf); ++ buf->buflen -= XDR_UNIT * 2; + buf->pages = rqstp->rq_next_page; + rqstp->rq_next_page += (buf->buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; + diff --git a/patches.suse/Only-output-backtracking-information-in-log-level-2.patch b/patches.suse/Only-output-backtracking-information-in-log-level-2.patch new file mode 100644 index 0000000..0a5ab91 --- /dev/null +++ b/patches.suse/Only-output-backtracking-information-in-log-level-2.patch @@ -0,0 +1,48 @@ +From: Christy Lee +Date: Thu, 16 Dec 2021 13:33:58 -0800 +Subject: Only output backtracking information in log level 2 +Patch-mainline: v5.17-rc1 +Git-commit: 496f3324048b6ce024af19ac84a03f6bdef93b7d +References: jsc#PED-1368 + +Backtracking information is very verbose, don't print it in log +level 1 to improve readability. + +Signed-off-by: Christy Lee +Signed-off-by: Alexei Starovoitov +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211216213358.3374427-4-christylee@fb.com +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/verifier.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -2404,7 +2404,7 @@ static int backtrack_insn(struct bpf_ver + + if (insn->code == 0) + return 0; +- if (env->log.level & BPF_LOG_LEVEL) { ++ if (env->log.level & BPF_LOG_LEVEL2) { + verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask); + verbose(env, "%d: ", idx); + print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); +@@ -2662,7 +2662,7 @@ static int __mark_chain_precision(struct + DECLARE_BITMAP(mask, 64); + u32 history = st->jmp_history_cnt; + +- if (env->log.level & BPF_LOG_LEVEL) ++ if (env->log.level & BPF_LOG_LEVEL2) + verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx); + for (i = last_idx;;) { + if (skip_first) { +@@ -2749,7 +2749,7 @@ static int __mark_chain_precision(struct + new_marks = true; + reg->precise = true; + } +- if (env->log.level & BPF_LOG_LEVEL) { ++ if (env->log.level & BPF_LOG_LEVEL2) { + verbose(env, "parent %s regs=%x stack=%llx marks:", + new_marks ? "didn't have" : "already had", + reg_mask, stack_mask); diff --git a/patches.suse/PCI-Export-pci_dev_lock b/patches.suse/PCI-Export-pci_dev_lock new file mode 100644 index 0000000..55b16e0 --- /dev/null +++ b/patches.suse/PCI-Export-pci_dev_lock @@ -0,0 +1,50 @@ +From: Niklas Schnelle +Date: Wed, 5 May 2021 14:00:06 +0200 +Subject: PCI: Export pci_dev_lock() +Git-commit: dfd5bb23ad75bdabde89ac3166705a450bf16acb +Patch-mainline: v5.16-rc1 +References: jsc#PED-592 + +Commit e3a9b1212b9d ("PCI: Export pci_dev_trylock() and pci_dev_unlock()") +already exported pci_dev_trylock()/pci_dev_unlock() however in some +circumstances such as during error recovery it makes sense to block +waiting to get full access to the device so also export pci_dev_lock(). + +Link: https://lore.kernel.org/all/20210928181014.GA713179@bhelgaas/ +Acked-by: Pierre Morel +Acked-by: Bjorn Helgaas +Signed-off-by: Niklas Schnelle +Signed-off-by: Vasily Gorbik +Acked-by: Petr Tesarik +--- + drivers/pci/pci.c | 3 ++- + include/linux/pci.h | 1 + + 2 files changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/pci/pci.c ++++ b/drivers/pci/pci.c +@@ -5051,12 +5051,13 @@ static int pci_reset_bus_function(struct + return pci_parent_bus_reset(dev, probe); + } + +-static void pci_dev_lock(struct pci_dev *dev) ++void pci_dev_lock(struct pci_dev *dev) + { + pci_cfg_access_lock(dev); + /* block PM suspend, driver probe, etc. */ + device_lock(&dev->dev); + } ++EXPORT_SYMBOL_GPL(pci_dev_lock); + + /* Return 1 on successful lock, 0 on contention */ + int pci_dev_trylock(struct pci_dev *dev) +--- a/include/linux/pci.h ++++ b/include/linux/pci.h +@@ -1642,6 +1642,7 @@ void pci_cfg_access_lock(struct pci_dev + bool pci_cfg_access_trylock(struct pci_dev *dev); + void pci_cfg_access_unlock(struct pci_dev *dev); + ++void pci_dev_lock(struct pci_dev *dev); + int pci_dev_trylock(struct pci_dev *dev); + void pci_dev_unlock(struct pci_dev *dev); + diff --git a/patches.suse/PCI-IOV-Fix-wrong-kernel-doc-identifier.patch b/patches.suse/PCI-IOV-Fix-wrong-kernel-doc-identifier.patch new file mode 100644 index 0000000..0a28d4c --- /dev/null +++ b/patches.suse/PCI-IOV-Fix-wrong-kernel-doc-identifier.patch @@ -0,0 +1,45 @@ +From 8d26c4328b468e449df21314ef993eeaefc0306f Mon Sep 17 00:00:00 2001 +From: Leon Romanovsky +Date: Mon, 7 Mar 2022 13:33:25 +0200 +Subject: [PATCH] PCI/IOV: Fix wrong kernel-doc identifier +Git-commit: 8d26c4328b468e449df21314ef993eeaefc0306f +Patch-mainline: v5.18-rc1 +References: git-fixes + +Replace "-" to be ":" in comment section to be aligned with +kernel-doc format. + +drivers/pci/iov.c:67: warning: Function parameter or member 'dev' not described in 'pci_iov_get_pf_drvdata' +drivers/pci/iov.c:67: warning: Function parameter or member 'pf_driver' not described in 'pci_iov_get_pf_drvdata' + +Fixes: a7e9f240c0da ("PCI/IOV: Add pci_iov_get_pf_drvdata() to allow VF reaching the drvdata of a PF") +Reported-by: Stephen Rothwell +Signed-off-by: Leon Romanovsky +Acked-by: Randy Dunlap +Acked-by: Bjorn Helgaas +Link: https://lore.kernel.org/r/8cecf7df45948a256dc56148cf9e87b2f2bb4198.1646652504.git.leonro@nvidia.com +Signed-off-by: Alex Williamson +Acked-by: Takashi Iwai + +--- + drivers/pci/iov.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c +index 28ec952e1221..952217572113 100644 +--- a/drivers/pci/iov.c ++++ b/drivers/pci/iov.c +@@ -49,8 +49,8 @@ EXPORT_SYMBOL_GPL(pci_iov_vf_id); + + /** + * pci_iov_get_pf_drvdata - Return the drvdata of a PF +- * @dev - VF pci_dev +- * @pf_driver - Device driver required to own the PF ++ * @dev: VF pci_dev ++ * @pf_driver: Device driver required to own the PF + * + * This must be called from a context that ensures that a VF driver is attached. + * The value returned is invalid once the VF driver completes its remove() +-- +2.35.3 + diff --git a/patches.suse/PM-runtime-Redefine-pm_runtime_release_supplier.patch b/patches.suse/PM-runtime-Redefine-pm_runtime_release_supplier.patch new file mode 100644 index 0000000..0b4ea03 --- /dev/null +++ b/patches.suse/PM-runtime-Redefine-pm_runtime_release_supplier.patch @@ -0,0 +1,110 @@ +From 07358194badf73e267289b40b761f5dc56928eab Mon Sep 17 00:00:00 2001 +From: "Rafael J. Wysocki" +Date: Mon, 27 Jun 2022 20:42:18 +0200 +Subject: [PATCH] PM: runtime: Redefine pm_runtime_release_supplier() +Git-commit: 07358194badf73e267289b40b761f5dc56928eab +References: git-fixes +Patch-mainline: v5.19-rc6 + +Instead of passing an extra bool argument to pm_runtime_release_supplier(), +make its callers take care of triggering a runtime-suspend of the +supplier device as needed. + +No expected functional impact. + +Suggested-by: Greg Kroah-Hartman +Signed-off-by: Rafael J. Wysocki +Reviewed-by: Greg Kroah-Hartman +Cc: 5.1+ # 5.1+ +Signed-off-by: Oliver Neukum +--- + drivers/base/core.c | 3 ++- + drivers/base/power/runtime.c | 20 +++++++++----------- + include/linux/pm_runtime.h | 5 ++--- + 3 files changed, 13 insertions(+), 15 deletions(-) + +--- a/drivers/base/core.c ++++ b/drivers/base/core.c +@@ -474,7 +474,8 @@ static void device_link_release_fn(struc + /* Ensure that all references to the link object have been dropped. */ + device_link_synchronize_removal(); + +- pm_runtime_release_supplier(link, true); ++ pm_runtime_release_supplier(link); ++ pm_request_idle(link->supplier); + + put_device(link->consumer); + put_device(link->supplier); +--- a/drivers/base/power/runtime.c ++++ b/drivers/base/power/runtime.c +@@ -308,13 +308,10 @@ static int rpm_get_suppliers(struct devi + /** + * pm_runtime_release_supplier - Drop references to device link's supplier. + * @link: Target device link. +- * @check_idle: Whether or not to check if the supplier device is idle. + * +- * Drop all runtime PM references associated with @link to its supplier device +- * and if @check_idle is set, check if that device is idle (and so it can be +- * suspended). ++ * Drop all runtime PM references associated with @link to its supplier device. + */ +-void pm_runtime_release_supplier(struct device_link *link, bool check_idle) ++void pm_runtime_release_supplier(struct device_link *link) + { + struct device *supplier = link->supplier; + +@@ -327,9 +324,6 @@ void pm_runtime_release_supplier(struct + while (refcount_dec_not_one(&link->rpm_active) && + atomic_read(&supplier->power.usage_count) > 0) + pm_runtime_put_noidle(supplier); +- +- if (check_idle) +- pm_request_idle(supplier); + } + + static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend) +@@ -337,8 +331,11 @@ static void __rpm_put_suppliers(struct d + struct device_link *link; + + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, +- device_links_read_lock_held()) +- pm_runtime_release_supplier(link, try_to_suspend); ++ device_links_read_lock_held()) { ++ pm_runtime_release_supplier(link); ++ if (try_to_suspend) ++ pm_request_idle(link->supplier); ++ } + } + + static void rpm_put_suppliers(struct device *dev) +@@ -1781,7 +1778,8 @@ void pm_runtime_drop_link(struct device_ + return; + + pm_runtime_drop_link_count(link->consumer); +- pm_runtime_release_supplier(link, true); ++ pm_runtime_release_supplier(link); ++ pm_request_idle(link->supplier); + } + + static bool pm_runtime_need_not_resume(struct device *dev) +--- a/include/linux/pm_runtime.h ++++ b/include/linux/pm_runtime.h +@@ -58,7 +58,7 @@ extern void pm_runtime_get_suppliers(str + extern void pm_runtime_put_suppliers(struct device *dev); + extern void pm_runtime_new_link(struct device *dev); + extern void pm_runtime_drop_link(struct device_link *link); +-extern void pm_runtime_release_supplier(struct device_link *link, bool check_idle); ++extern void pm_runtime_release_supplier(struct device_link *link); + + /** + * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter. +@@ -280,8 +280,7 @@ static inline void pm_runtime_get_suppli + static inline void pm_runtime_put_suppliers(struct device *dev) {} + static inline void pm_runtime_new_link(struct device *dev) {} + static inline void pm_runtime_drop_link(struct device_link *link) {} +-static inline void pm_runtime_release_supplier(struct device_link *link, +- bool check_idle) {} ++static inline void pm_runtime_release_supplier(struct device_link *link) {} + + #endif /* !CONFIG_PM */ + diff --git a/patches.suse/SUNRPC-Fix-svcxdr_init_decode-s-end-of-buffer-calcul.patch b/patches.suse/SUNRPC-Fix-svcxdr_init_decode-s-end-of-buffer-calcul.patch new file mode 100644 index 0000000..08993ef --- /dev/null +++ b/patches.suse/SUNRPC-Fix-svcxdr_init_decode-s-end-of-buffer-calcul.patch @@ -0,0 +1,65 @@ +From: Chuck Lever +Date: Thu, 1 Sep 2022 15:09:53 -0400 +Subject: [PATCH] SUNRPC: Fix svcxdr_init_decode's end-of-buffer calculation +Git-commit: 90bfc37b5ab91c1a6165e3e5cfc49bf04571b762 +Patch-mainline: v6.1 +References: bsc#1205128 CVE-2022-43945 + +Ensure that stream-based argument decoding can't go past the actual +end of the receive buffer. xdr_init_decode's calculation of the +value of xdr->end over-estimates the end of the buffer because the +Linux kernel RPC server code does not remove the size of the RPC +header from rqstp->rq_arg before calling the upper layer's +dispatcher. + +The server-side still uses the svc_getnl() macros to decode the +RPC call header. These macros reduce the length of the head iov +but do not update the total length of the message in the buffer +(buf->len). + +A proper fix for this would be to replace the use of svc_getnl() and +friends in the RPC header decoder, but that would be a large and +invasive change that would be difficult to backport. + +Fixes: 5191955d6fc6 ("SUNRPC: Prepare for xdr_stream-style decoding on the server-side") +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Acked-by: NeilBrown + +--- + include/linux/sunrpc/svc.h | 17 ++++++++++++++--- + 1 file changed, 14 insertions(+), 3 deletions(-) + +--- a/include/linux/sunrpc/svc.h ++++ b/include/linux/sunrpc/svc.h +@@ -567,16 +567,27 @@ static inline void svc_reserve_auth(stru + } + + /** +- * svcxdr_init_decode - Prepare an xdr_stream for svc Call decoding ++ * svcxdr_init_decode - Prepare an xdr_stream for Call decoding + * @rqstp: controlling server RPC transaction context + * ++ * This function currently assumes the RPC header in rq_arg has ++ * already been decoded. Upon return, xdr->p points to the ++ * location of the upper layer header. + */ + static inline void svcxdr_init_decode(struct svc_rqst *rqstp) + { + struct xdr_stream *xdr = &rqstp->rq_arg_stream; +- struct kvec *argv = rqstp->rq_arg.head; ++ struct xdr_buf *buf = &rqstp->rq_arg; ++ struct kvec *argv = buf->head; + +- xdr_init_decode(xdr, &rqstp->rq_arg, argv->iov_base, NULL); ++ /* ++ * svc_getnl() and friends do not keep the xdr_buf's ::len ++ * field up to date. Refresh that field before initializing ++ * the argument decoding stream. ++ */ ++ buf->len = buf->head->iov_len + buf->page_len + buf->tail->iov_len; ++ ++ xdr_init_decode(xdr, buf, argv->iov_base, NULL); + xdr_set_scratch_page(xdr, rqstp->rq_scratch_page); + } + diff --git a/patches.suse/SUNRPC-Fix-svcxdr_init_encode-s-buflen-calculation.patch b/patches.suse/SUNRPC-Fix-svcxdr_init_encode-s-buflen-calculation.patch new file mode 100644 index 0000000..6df86d4 --- /dev/null +++ b/patches.suse/SUNRPC-Fix-svcxdr_init_encode-s-buflen-calculation.patch @@ -0,0 +1,36 @@ +From: Chuck Lever +Date: Thu, 1 Sep 2022 15:09:59 -0400 +Subject: [PATCH] SUNRPC: Fix svcxdr_init_encode's buflen calculation +Git-commit: 1242a87da0d8cd2a428e96ca68e7ea899b0f4624 +Patch-mainline: v6.1 +References: bsc#1205128 CVE-2022-43945 + +Commit 2825a7f90753 ("nfsd4: allow encoding across page boundaries") +added an explicit computation of the remaining length in the rq_res +XDR buffer. + +The computation appears to suffer from an "off-by-one" bug. Because +buflen is too large by one page, XDR encoding can run off the end of +the send buffer by eventually trying to use the struct page address +in rq_page_end, which always contains NULL. + +Fixes: bddfdbcddbe2 ("NFSD: Extract the svcxdr_init_encode() helper") +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Acked-by: NeilBrown + +--- + include/linux/sunrpc/svc.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/linux/sunrpc/svc.h ++++ b/include/linux/sunrpc/svc.h +@@ -610,7 +610,7 @@ static inline void svcxdr_init_encode(st + xdr->end = resv->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; + buf->len = resv->iov_len; + xdr->page_ptr = buf->pages - 1; +- buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages); ++ buf->buflen = PAGE_SIZE * (rqstp->rq_page_end - buf->pages); + buf->buflen -= rqstp->rq_auth_slack; + xdr->rqst = NULL; + } diff --git a/patches.suse/USB-gadget-Fix-return-of-EBUSY.patch b/patches.suse/USB-gadget-Fix-return-of-EBUSY.patch new file mode 100644 index 0000000..5e1f48a --- /dev/null +++ b/patches.suse/USB-gadget-Fix-return-of-EBUSY.patch @@ -0,0 +1,46 @@ +From d7c90d9f9a5b6a85c09d37c5616d880d849a0c8b Mon Sep 17 00:00:00 2001 +From: Colin Ian King +Date: Wed, 4 May 2022 14:58:40 +0100 +Subject: [PATCH] USB: gadget: Fix return of -EBUSY +Git-commit: d7c90d9f9a5b6a85c09d37c5616d880d849a0c8b +Patch-mainline: v5.19-rc1 +References: git-fixes + +Currently when driver->match_existing_only is true, the error return is +set to -EBUSY however ret is then set to 0 at the end of the if/else +statement. I believe the ret = 0 statement should be set in the else +part of the if statement and not at the end to ensure -EBUSY is being +returned correctly. + +Detected by clang scan: +drivers/usb/gadget/udc/core.c:1558:4: warning: Value stored to 'ret' is +never read [deadcode.DeadStores] + +Fixes: fc274c1e9973 ("USB: gadget: Add a new bus for gadgets") +Acked-by: Alan Stern +Signed-off-by: Colin Ian King +Link: https://lore.kernel.org/r/20220504135840.232209-1-colin.i.king@gmail.com +Signed-off-by: Greg Kroah-Hartman +Acked-by: Takashi Iwai + +--- + drivers/usb/gadget/udc/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c +index 61790592b2c8..3281d8a3dae7 100644 +--- a/drivers/usb/gadget/udc/core.c ++++ b/drivers/usb/gadget/udc/core.c +@@ -1559,8 +1559,8 @@ int usb_gadget_register_driver_owner(struct usb_gadget_driver *driver, + } else { + pr_info("%s: couldn't find an available UDC\n", + driver->function); ++ ret = 0; + } +- ret = 0; + } + mutex_unlock(&udc_lock); + +-- +2.35.3 + diff --git a/patches.suse/VDUSE-fix-documentation-underline-warning.patch b/patches.suse/VDUSE-fix-documentation-underline-warning.patch new file mode 100644 index 0000000..16bf375 --- /dev/null +++ b/patches.suse/VDUSE-fix-documentation-underline-warning.patch @@ -0,0 +1,46 @@ +From 09b6addf64860bf7b0e3dddf03229c8c20eda4b5 Mon Sep 17 00:00:00 2001 +From: Randy Dunlap +Date: Wed, 6 Oct 2021 13:29:04 -0700 +Subject: [PATCH] VDUSE: fix documentation underline warning +Git-commit: 09b6addf64860bf7b0e3dddf03229c8c20eda4b5 +Patch-mainline: v5.15-rc6 +References: git-fixes + +Fix a VDUSE documentation build warning: + +Documentation/userspace-api/vduse.rst:21: WARNING: Title underline too short. + +Fixes: 7bc7f61897b6 ("Documentation: Add documentation for VDUSE") +Signed-off-by: Randy Dunlap +Cc: Xie Yongji +Cc: Jason Wang +Cc: Michael S. Tsirkin +Cc: virtualization@lists.linux-foundation.org +Cc: Jonathan Corbet +Cc: linux-doc@vger.kernel.org +Link: https://lore.kernel.org/r/20211006202904.30241-1-rdunlap@infradead.org +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Xie Yongji +Acked-by: Jason Wang +Acked-by: Takashi Iwai + +--- + Documentation/userspace-api/vduse.rst | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Documentation/userspace-api/vduse.rst b/Documentation/userspace-api/vduse.rst +index 42ef59ea5314..bdb880e01132 100644 +--- a/Documentation/userspace-api/vduse.rst ++++ b/Documentation/userspace-api/vduse.rst +@@ -18,7 +18,7 @@ types can be added after the security issue of corresponding device driver + is clarified or fixed in the future. + + Create/Destroy VDUSE devices +------------------------- ++---------------------------- + + VDUSE devices are created as follows: + +-- +2.35.3 + diff --git a/patches.suse/add-includes-masked-by-cgroup-bpf-dependency.patch b/patches.suse/add-includes-masked-by-cgroup-bpf-dependency.patch new file mode 100644 index 0000000..0d97ca3 --- /dev/null +++ b/patches.suse/add-includes-masked-by-cgroup-bpf-dependency.patch @@ -0,0 +1,40 @@ +From: Jakub Kicinski +Date: Wed, 15 Dec 2021 18:55:36 -0800 +Subject: add includes masked by cgroup -> bpf dependency +Patch-mainline: v5.17-rc1 +Git-commit: f7ea534a0920dbaf71a8003936e178e14ec9271d +References: jsc#PED-1368 + +cgroup pulls in BPF which pulls in a lot of includes. +We're about to break that chain so fix those who were +depending on it. + +Signed-off-by: Jakub Kicinski +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211216025538.1649516-2-kuba@kernel.org +Acked-by: Shung-Hsi Yu +--- + arch/s390/mm/hugetlbpage.c | 1 + + include/linux/perf_event.h | 1 + + 2 files changed, 2 insertions(+) + +--- a/arch/s390/mm/hugetlbpage.c ++++ b/arch/s390/mm/hugetlbpage.c +@@ -9,6 +9,7 @@ + #define KMSG_COMPONENT "hugetlb" + #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + ++#include + #include + #include + #include +--- a/include/linux/perf_event.h ++++ b/include/linux/perf_event.h +@@ -611,6 +611,7 @@ struct swevent_hlist { + #define PERF_ATTACH_SCHED_CB 0x20 + #define PERF_ATTACH_CHILD 0x40 + ++struct bpf_prog; + struct perf_cgroup; + struct perf_buffer; + diff --git a/patches.suse/add-missing-bpf-cgroup.h-includes.patch b/patches.suse/add-missing-bpf-cgroup.h-includes.patch new file mode 100644 index 0000000..ce6aabb --- /dev/null +++ b/patches.suse/add-missing-bpf-cgroup.h-includes.patch @@ -0,0 +1,129 @@ +From: Jakub Kicinski +Date: Wed, 15 Dec 2021 18:55:37 -0800 +Subject: add missing bpf-cgroup.h includes +Patch-mainline: v5.17-rc1 +Git-commit: aef2feda97b840ec38e9fa53d0065188453304e8 +References: jsc#PED-1368 + +We're about to break the cgroup-defs.h -> bpf-cgroup.h dependency, +make sure those who actually need more than the definition of +struct cgroup_bpf include bpf-cgroup.h explicitly. + +Signed-off-by: Jakub Kicinski +Signed-off-by: Alexei Starovoitov +Acked-by: Tejun Heo +Link: https://lore.kernel.org/bpf/20211216025538.1649516-3-kuba@kernel.org +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/helpers.c | 1 + + kernel/bpf/syscall.c | 1 + + kernel/bpf/verifier.c | 1 + + kernel/cgroup/cgroup.c | 1 + + kernel/trace/trace_kprobe.c | 1 + + kernel/trace/trace_uprobe.c | 1 + + net/ipv4/udp.c | 1 + + net/ipv6/udp.c | 1 + + net/socket.c | 1 + + security/device_cgroup.c | 1 + + 10 files changed, 10 insertions(+) + +--- a/kernel/bpf/helpers.c ++++ b/kernel/bpf/helpers.c +@@ -2,6 +2,7 @@ + /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + */ + #include ++#include + #include + #include + #include +--- a/kernel/bpf/syscall.c ++++ b/kernel/bpf/syscall.c +@@ -2,6 +2,7 @@ + /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + */ + #include ++#include + #include + #include + #include +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -4,6 +4,7 @@ + * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io + */ + #include ++#include + #include + #include + #include +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -30,6 +30,7 @@ + + #include "cgroup-internal.h" + ++#include + #include + #include + #include +--- a/kernel/trace/trace_kprobe.c ++++ b/kernel/trace/trace_kprobe.c +@@ -7,6 +7,7 @@ + */ + #define pr_fmt(fmt) "trace_kprobe: " fmt + ++#include + #include + #include + #include +--- a/kernel/trace/trace_uprobe.c ++++ b/kernel/trace/trace_uprobe.c +@@ -7,6 +7,7 @@ + */ + #define pr_fmt(fmt) "trace_uprobe: " fmt + ++#include + #include + #include + #include +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -74,6 +74,7 @@ + + #define pr_fmt(fmt) "UDP: " fmt + ++#include + #include + #include + #include +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -17,6 +17,7 @@ + * YOSHIFUJI Hideaki @USAGI: convert /proc/net/udp6 to seq_file. + */ + ++#include + #include + #include + #include +--- a/net/socket.c ++++ b/net/socket.c +@@ -52,6 +52,7 @@ + * Based upon Swansea University Computer Society NET3.039 + */ + ++#include + #include + #include + #include +--- a/security/device_cgroup.c ++++ b/security/device_cgroup.c +@@ -5,6 +5,7 @@ + * Copyright 2007 IBM Corp + */ + ++#include + #include + #include + #include diff --git a/patches.suse/affs-use-bdev_nr_sectors-instead-of-open-coding-it.patch b/patches.suse/affs-use-bdev_nr_sectors-instead-of-open-coding-it.patch new file mode 100644 index 0000000..b1f987e --- /dev/null +++ b/patches.suse/affs-use-bdev_nr_sectors-instead-of-open-coding-it.patch @@ -0,0 +1,35 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:11 +0200 +Subject: [PATCH] affs: use bdev_nr_sectors instead of open coding it +Git-commit: 589aa7bc40c4f823dd6094cef51f8cff60e26e95 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helper to read the block device size. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Reviewed-by: Chaitanya Kulkarni +Link: https://lore.kernel.org/r/20211018101130.1838532-12-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/affs/super.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/affs/super.c b/fs/affs/super.c +index c6c2a513ec92..c609005a9eaa 100644 +--- a/fs/affs/super.c ++++ b/fs/affs/super.c +@@ -389,7 +389,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) + * blocks, we will have to change it. + */ + +- size = i_size_read(sb->s_bdev->bd_inode) >> 9; ++ size = bdev_nr_sectors(sb->s_bdev); + pr_debug("initial blocksize=%d, #blocks=%d\n", 512, size); + + affs_set_blocksize(sb, PAGE_SIZE); +-- +2.35.3 + diff --git a/patches.suse/amiflop-add-error-handling-support-for-add_disk.patch b/patches.suse/amiflop-add-error-handling-support-for-add_disk.patch new file mode 100644 index 0000000..78c3984 --- /dev/null +++ b/patches.suse/amiflop-add-error-handling-support-for-add_disk.patch @@ -0,0 +1,48 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:02:54 -0700 +Subject: [PATCH] amiflop: add error handling support for add_disk() +Git-commit: a2379420c7d7cb14a8b214fc7c0e2f55f66393ac +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. The caller for fd_alloc_disk() deals with +the rest of the cleanup like the tag. + +Signed-off-by: Luis Chamberlain +Link: https://lore.kernel.org/r/20210927220302.1073499-7-mcgrof@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/amiflop.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c +index 2909fd9e72fb..bf5c124c5452 100644 +--- a/drivers/block/amiflop.c ++++ b/drivers/block/amiflop.c +@@ -1780,6 +1780,7 @@ static const struct blk_mq_ops amiflop_mq_ops = { + static int fd_alloc_disk(int drive, int system) + { + struct gendisk *disk; ++ int err; + + disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL); + if (IS_ERR(disk)) +@@ -1798,8 +1799,10 @@ static int fd_alloc_disk(int drive, int system) + set_capacity(disk, 880 * 2); + + unit[drive].gendisk[system] = disk; +- add_disk(disk); +- return 0; ++ err = add_disk(disk); ++ if (err) ++ blk_cleanup_disk(disk); ++ return err; + } + + static int fd_alloc_drive(int drive) +-- +2.35.3 + diff --git a/patches.suse/aoe-add-error-handling-support-for-add_disk.patch b/patches.suse/aoe-add-error-handling-support-for-add_disk.patch new file mode 100644 index 0000000..ae87fcf --- /dev/null +++ b/patches.suse/aoe-add-error-handling-support-for-add_disk.patch @@ -0,0 +1,45 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:00:57 -0700 +Subject: [PATCH] aoe: add error handling support for add_disk() +Git-commit: d9c2bd252a4578419e0b863bfe3dd97b858ccd8e +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +Signed-off-by: Luis Chamberlain +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/aoe/aoeblk.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c +index 06b360f7123a..e436b0e8eff5 100644 +--- a/drivers/block/aoe/aoeblk.c ++++ b/drivers/block/aoe/aoeblk.c +@@ -417,7 +417,9 @@ aoeblk_gdalloc(void *vp) + + spin_unlock_irqrestore(&d->lock, flags); + +- device_add_disk(NULL, gd, aoe_attr_groups); ++ err = device_add_disk(NULL, gd, aoe_attr_groups); ++ if (err) ++ goto out_disk_cleanup; + aoedisk_add_debugfs(d); + + spin_lock_irqsave(&d->lock, flags); +@@ -426,6 +428,8 @@ aoeblk_gdalloc(void *vp) + spin_unlock_irqrestore(&d->lock, flags); + return; + ++out_disk_cleanup: ++ blk_cleanup_disk(gd); + err_tagset: + blk_mq_free_tag_set(set); + err_mempool: +-- +2.35.3 + diff --git a/patches.suse/arch-Remove-leftovers-from-prism54-wireless-driver.patch b/patches.suse/arch-Remove-leftovers-from-prism54-wireless-driver.patch new file mode 100644 index 0000000..d11e46d --- /dev/null +++ b/patches.suse/arch-Remove-leftovers-from-prism54-wireless-driver.patch @@ -0,0 +1,87 @@ +From e0cb56546d39956cd6c42e690548cafc97e50896 Mon Sep 17 00:00:00 2001 +From: Alexandre Ghiti +Date: Thu, 16 Dec 2021 10:44:26 +0100 +Subject: [PATCH] arch: Remove leftovers from prism54 wireless driver +Git-commit: e0cb56546d39956cd6c42e690548cafc97e50896 +Patch-mainline: v5.17-rc1 +References: git-fixes + +This driver was removed so remove all references to it. + +Fixes: d249ff28b1d8 ("intersil: remove obsolete prism54 wireless driver") +Signed-off-by: Alexandre Ghiti +Acked-by: Thomas Bogendoerfer +Signed-off-by: Arnd Bergmann +Acked-by: Takashi Iwai + +--- + arch/mips/configs/ip27_defconfig | 1 - + arch/mips/configs/malta_defconfig | 1 - + arch/mips/configs/malta_kvm_defconfig | 1 - + arch/mips/configs/maltaup_xpa_defconfig | 1 - + arch/powerpc/configs/pmac32_defconfig | 1 - + 5 files changed, 5 deletions(-) + +diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig +index 638d7cf5ef01..821630ac1be7 100644 +--- a/arch/mips/configs/ip27_defconfig ++++ b/arch/mips/configs/ip27_defconfig +@@ -223,7 +223,6 @@ CONFIG_TMD_HERMES=m + CONFIG_NORTEL_HERMES=m + CONFIG_P54_COMMON=m + CONFIG_P54_PCI=m +-CONFIG_PRISM54=m + CONFIG_LIBERTAS=m + CONFIG_LIBERTAS_THINFIRM=m + CONFIG_MWL8K=m +diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig +index 9cb2cf2595e0..3321bb576944 100644 +--- a/arch/mips/configs/malta_defconfig ++++ b/arch/mips/configs/malta_defconfig +@@ -302,7 +302,6 @@ CONFIG_HOSTAP_FIRMWARE=y + CONFIG_HOSTAP_FIRMWARE_NVRAM=y + CONFIG_HOSTAP_PLX=m + CONFIG_HOSTAP_PCI=m +-CONFIG_PRISM54=m + CONFIG_LIBERTAS=m + CONFIG_INPUT_MOUSEDEV=y + CONFIG_MOUSE_PS2_ELANTECH=y +diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig +index 5924e48fd3ec..009b30372226 100644 +--- a/arch/mips/configs/malta_kvm_defconfig ++++ b/arch/mips/configs/malta_kvm_defconfig +@@ -310,7 +310,6 @@ CONFIG_HOSTAP_FIRMWARE=y + CONFIG_HOSTAP_FIRMWARE_NVRAM=y + CONFIG_HOSTAP_PLX=m + CONFIG_HOSTAP_PCI=m +-CONFIG_PRISM54=m + CONFIG_LIBERTAS=m + CONFIG_INPUT_MOUSEDEV=y + CONFIG_SERIAL_8250=y +diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig +index c0d3156ef640..e214e136101c 100644 +--- a/arch/mips/configs/maltaup_xpa_defconfig ++++ b/arch/mips/configs/maltaup_xpa_defconfig +@@ -309,7 +309,6 @@ CONFIG_HOSTAP_FIRMWARE=y + CONFIG_HOSTAP_FIRMWARE_NVRAM=y + CONFIG_HOSTAP_PLX=m + CONFIG_HOSTAP_PCI=m +-CONFIG_PRISM54=m + CONFIG_LIBERTAS=m + CONFIG_INPUT_MOUSEDEV=y + CONFIG_MOUSE_PS2_ELANTECH=y +diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig +index 7aefac5afab0..13885ec563d1 100644 +--- a/arch/powerpc/configs/pmac32_defconfig ++++ b/arch/powerpc/configs/pmac32_defconfig +@@ -169,7 +169,6 @@ CONFIG_USB_USBNET=m + CONFIG_B43=m + CONFIG_B43LEGACY=m + CONFIG_P54_COMMON=m +-CONFIG_PRISM54=m + CONFIG_INPUT_EVDEV=y + # CONFIG_KEYBOARD_ATKBD is not set + # CONFIG_MOUSE_PS2 is not set +-- +2.35.3 + diff --git a/patches.suse/ataflop-remove-ataflop_probe_lock-mutex.patch b/patches.suse/ataflop-remove-ataflop_probe_lock-mutex.patch index 2d15bdc..f8af578 100644 --- a/patches.suse/ataflop-remove-ataflop_probe_lock-mutex.patch +++ b/patches.suse/ataflop-remove-ataflop_probe_lock-mutex.patch @@ -1,4 +1,3 @@ -From 267a022590e3754481df650e4c616281d113651f Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 3 Nov 2021 16:04:33 -0700 Subject: [PATCH] ataflop: remove ataflop_probe_lock mutex @@ -6,8 +5,6 @@ Git-commit: 4ddb85d36613c45bde00d368bf9f357bd0708a0c Patch-mainline: v5.16-rc1 References: stable-5.14.19 -[ Upstream commit 4ddb85d36613c45bde00d368bf9f357bd0708a0c ] - Commit bf9c0538e485b591 ("ataflop: use a separate gendisk for each media format") introduced ataflop_probe_lock mutex, but forgot to unlock the mutex when atari_floppy_init() (i.e. module loading) succeeded. This will @@ -37,15 +34,13 @@ Reviewed-by: Luis Chamberlain Tested-by: Michael Schmitz Link: https://lore.kernel.org/r/20211103230437.1639990-11-mcgrof@kernel.org Signed-off-by: Jens Axboe -Signed-off-by: Sasha Levin Acked-by: Takashi Iwai - --- drivers/block/ataflop.c | 47 +++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c -index 123ad5819309..aab48b292a3b 100644 +index d14bdc3589b2..170dd193cef6 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -2008,8 +2008,6 @@ static int ataflop_alloc_disk(unsigned int drive, unsigned int type) @@ -104,7 +99,7 @@ index 123ad5819309..aab48b292a3b 100644 for (i = 0; i < FD_MAX_UNITS; i++) { memset(&unit[i].tag_set, 0, sizeof(unit[i].tag_set)); unit[i].tag_set.ops = &ataflop_mq_ops; -@@ -2111,15 +2122,17 @@ static int __init atari_floppy_init (void) +@@ -2113,7 +2124,12 @@ static int __init atari_floppy_init (void) UseTrackbuffer ? "" : "no "); config_types(); @@ -116,7 +111,9 @@ index 123ad5819309..aab48b292a3b 100644 + } + return ret; - err: + err_out_dma: + atari_stram_free(DMABuffer); +@@ -2121,9 +2137,6 @@ static int __init atari_floppy_init (void) while (--i >= 0) atari_cleanup_floppy_disk(&unit[i]); @@ -126,7 +123,7 @@ index 123ad5819309..aab48b292a3b 100644 return ret; } -@@ -2164,14 +2177,8 @@ __setup("floppy=", atari_floppy_setup); +@@ -2168,14 +2181,8 @@ __setup("floppy=", atari_floppy_setup); static void __exit atari_floppy_exit(void) { @@ -143,5 +140,5 @@ index 123ad5819309..aab48b292a3b 100644 module_init(atari_floppy_init) -- -2.26.2 +2.35.3 diff --git a/patches.suse/ath10k-abstract-htt_rx_desc-structure.patch b/patches.suse/ath10k-abstract-htt_rx_desc-structure.patch new file mode 100644 index 0000000..3484b8f --- /dev/null +++ b/patches.suse/ath10k-abstract-htt_rx_desc-structure.patch @@ -0,0 +1,1777 @@ +From 6bae9de622d3ef4805aba40e763eb4b0975c4f6d Mon Sep 17 00:00:00 2001 +From: Francesco Magliocca +Date: Wed, 12 Jan 2022 10:15:11 +0200 +Subject: [PATCH] ath10k: abstract htt_rx_desc structure +Git-commit: 6bae9de622d3ef4805aba40e763eb4b0975c4f6d +Patch-mainline: v5.18-rc1 +References: git-fixes + +QCA6174 card often hangs with the current htt_rx_desc +memory layout in some circumstances, because its firmware +fails to handle length differences. +Therefore we must abstract the htt_rx_desc structure +and operations on it, to allow different wireless cards +to use different, unrelated rx descriptor structures. + +Define a base htt_rx_desc structure and htt_rx_desc_v1 +for use with the QCA family of ath10k supported cards +and htt_rx_desc_v2 for use with the WCN3990 card. + +Define htt_rx_desc_ops which contains the abstract operations +to access the generic htt_rx_desc, give implementations +for each card and update htt_rx.c to use the defined +abstract interface to rx descriptors. + +Fixes: e3def6f7ddf8 ("ath10k: Update rx descriptor for WCN3990 target") + +Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00157-QCARMSWPZ-1 + +Co-developed-by: Enrico Lumetti +Signed-off-by: Enrico Lumetti +Signed-off-by: Francesco Magliocca +Link: https://lore.kernel.org/ath10k/CAH4F6usFu8-A6k5Z7rU9__iENcSC6Zr-NtRhh_aypR74UvN1uQ@mail.gmail.com/ +Signed-off-by: Kalle Valo +Link: https://lore.kernel.org/r/20211216151823.68878-1-franciman12@gmail.com +Acked-by: Takashi Iwai + +--- + drivers/net/wireless/ath/ath10k/core.c | 16 ++ + drivers/net/wireless/ath/ath10k/htt.c | 153 ++++++++++ + drivers/net/wireless/ath/ath10k/htt.h | 296 +++++++++++++++++-- + drivers/net/wireless/ath/ath10k/htt_rx.c | 331 +++++++++++++++------- + drivers/net/wireless/ath/ath10k/htt_tx.c | 36 +-- + drivers/net/wireless/ath/ath10k/hw.c | 15 +- + drivers/net/wireless/ath/ath10k/hw.h | 27 +- + drivers/net/wireless/ath/ath10k/rx_desc.h | 40 ++- + 8 files changed, 722 insertions(+), 192 deletions(-) + +diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c +index 8f5b8eb368fa..9e1f483e1362 100644 +--- a/drivers/net/wireless/ath/ath10k/core.c ++++ b/drivers/net/wireless/ath/ath10k/core.c +@@ -75,6 +75,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { + .board_size = QCA988X_BOARD_DATA_SZ, + .board_ext_size = QCA988X_BOARD_EXT_DATA_SZ, + }, ++ .rx_desc_ops = &qca988x_rx_desc_ops, + .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, + .spectral_bin_discard = 0, +@@ -111,6 +112,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { + .board_size = QCA988X_BOARD_DATA_SZ, + .board_ext_size = QCA988X_BOARD_EXT_DATA_SZ, + }, ++ .rx_desc_ops = &qca988x_rx_desc_ops, + .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, + .spectral_bin_discard = 0, +@@ -148,6 +150,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { + .board_size = QCA9887_BOARD_DATA_SZ, + .board_ext_size = QCA9887_BOARD_EXT_DATA_SZ, + }, ++ .rx_desc_ops = &qca988x_rx_desc_ops, + .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, + .spectral_bin_discard = 0, +@@ -184,6 +187,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { + .board_size = QCA6174_BOARD_DATA_SZ, + .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, + }, ++ .rx_desc_ops = &qca988x_rx_desc_ops, + .hw_ops = &qca6174_sdio_ops, + .hw_clk = qca6174_clk, + .target_cpu_freq = 176000000, +@@ -216,6 +220,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { + .board_size = QCA6174_BOARD_DATA_SZ, + .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, + }, ++ .rx_desc_ops = &qca988x_rx_desc_ops, + .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, + .spectral_bin_discard = 0, +@@ -252,6 +257,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { + .board_size = QCA6174_BOARD_DATA_SZ, + .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, + }, ++ .rx_desc_ops = &qca988x_rx_desc_ops, + .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, + .spectral_bin_discard = 0, +@@ -288,6 +294,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { + .board_size = QCA6174_BOARD_DATA_SZ, + .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, + }, ++ .rx_desc_ops = &qca988x_rx_desc_ops, + .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, + .spectral_bin_discard = 0, +@@ -325,6 +332,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { + .board_size = QCA6174_BOARD_DATA_SZ, + .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, + }, ++ .rx_desc_ops = &qca988x_rx_desc_ops, + .hw_ops = &qca6174_ops, + .hw_clk = qca6174_clk, + .target_cpu_freq = 176000000, +@@ -370,6 +378,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { + .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, + }, + .sw_decrypt_mcast_mgmt = true, ++ .rx_desc_ops = &qca99x0_rx_desc_ops, + .hw_ops = &qca99x0_ops, + .decap_align_bytes = 1, + .spectral_bin_discard = 4, +@@ -415,6 +424,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { + .ext_board_size = QCA99X0_EXT_BOARD_DATA_SZ, + }, + .sw_decrypt_mcast_mgmt = true, ++ .rx_desc_ops = &qca99x0_rx_desc_ops, + .hw_ops = &qca99x0_ops, + .decap_align_bytes = 1, + .spectral_bin_discard = 12, +@@ -461,6 +471,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { + .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, + }, + .sw_decrypt_mcast_mgmt = true, ++ .rx_desc_ops = &qca99x0_rx_desc_ops, + .hw_ops = &qca99x0_ops, + .decap_align_bytes = 1, + .spectral_bin_discard = 12, +@@ -501,6 +512,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { + .board_size = QCA9377_BOARD_DATA_SZ, + .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ, + }, ++ .rx_desc_ops = &qca988x_rx_desc_ops, + .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, + .spectral_bin_discard = 0, +@@ -537,6 +549,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { + .board_size = QCA9377_BOARD_DATA_SZ, + .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ, + }, ++ .rx_desc_ops = &qca988x_rx_desc_ops, + .hw_ops = &qca6174_ops, + .hw_clk = qca6174_clk, + .target_cpu_freq = 176000000, +@@ -575,6 +588,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { + .board_size = QCA9377_BOARD_DATA_SZ, + .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ, + }, ++ .rx_desc_ops = &qca988x_rx_desc_ops, + .hw_ops = &qca6174_ops, + .hw_clk = qca6174_clk, + .target_cpu_freq = 176000000, +@@ -611,6 +625,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { + .board_ext_size = QCA4019_BOARD_EXT_DATA_SZ, + }, + .sw_decrypt_mcast_mgmt = true, ++ .rx_desc_ops = &qca99x0_rx_desc_ops, + .hw_ops = &qca99x0_ops, + .decap_align_bytes = 1, + .spectral_bin_discard = 4, +@@ -643,6 +658,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { + .dir = WCN3990_HW_1_0_FW_DIR, + }, + .sw_decrypt_mcast_mgmt = true, ++ .rx_desc_ops = &wcn3990_rx_desc_ops, + .hw_ops = &wcn3990_ops, + .decap_align_bytes = 1, + .num_peers = TARGET_HL_TLV_NUM_PEERS, +diff --git a/drivers/net/wireless/ath/ath10k/htt.c b/drivers/net/wireless/ath/ath10k/htt.c +index 127b4e4980ef..907e1e13871a 100644 +--- a/drivers/net/wireless/ath/ath10k/htt.c ++++ b/drivers/net/wireless/ath/ath10k/htt.c +@@ -131,6 +131,159 @@ static const enum htt_t2h_msg_type htt_10_4_t2h_msg_types[] = { + HTT_T2H_MSG_TYPE_PEER_STATS, + }; + ++const struct ath10k_htt_rx_desc_ops qca988x_rx_desc_ops = { ++ .rx_desc_size = sizeof(struct htt_rx_desc_v1), ++ .rx_desc_msdu_payload_offset = offsetof(struct htt_rx_desc_v1, msdu_payload) ++}; ++ ++static int ath10k_qca99x0_rx_desc_get_l3_pad_bytes(struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v1 *rx_desc = container_of(rxd, ++ struct htt_rx_desc_v1, ++ base); ++ ++ return MS(__le32_to_cpu(rx_desc->msdu_end.qca99x0.info1), ++ RX_MSDU_END_INFO1_L3_HDR_PAD); ++} ++ ++static bool ath10k_qca99x0_rx_desc_msdu_limit_error(struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v1 *rx_desc = container_of(rxd, ++ struct htt_rx_desc_v1, ++ base); ++ ++ return !!(rx_desc->msdu_end.common.info0 & ++ __cpu_to_le32(RX_MSDU_END_INFO0_MSDU_LIMIT_ERR)); ++} ++ ++const struct ath10k_htt_rx_desc_ops qca99x0_rx_desc_ops = { ++ .rx_desc_size = sizeof(struct htt_rx_desc_v1), ++ .rx_desc_msdu_payload_offset = offsetof(struct htt_rx_desc_v1, msdu_payload), ++ ++ .rx_desc_get_l3_pad_bytes = ath10k_qca99x0_rx_desc_get_l3_pad_bytes, ++ .rx_desc_get_msdu_limit_error = ath10k_qca99x0_rx_desc_msdu_limit_error, ++}; ++ ++static void ath10k_rx_desc_wcn3990_get_offsets(struct htt_rx_ring_rx_desc_offsets *off) ++{ ++#define desc_offset(x) (offsetof(struct htt_rx_desc_v2, x) / 4) ++ off->mac80211_hdr_offset = __cpu_to_le16(desc_offset(rx_hdr_status)); ++ off->msdu_payload_offset = __cpu_to_le16(desc_offset(msdu_payload)); ++ off->ppdu_start_offset = __cpu_to_le16(desc_offset(ppdu_start)); ++ off->ppdu_end_offset = __cpu_to_le16(desc_offset(ppdu_end)); ++ off->mpdu_start_offset = __cpu_to_le16(desc_offset(mpdu_start)); ++ off->mpdu_end_offset = __cpu_to_le16(desc_offset(mpdu_end)); ++ off->msdu_start_offset = __cpu_to_le16(desc_offset(msdu_start)); ++ off->msdu_end_offset = __cpu_to_le16(desc_offset(msdu_end)); ++ off->rx_attention_offset = __cpu_to_le16(desc_offset(attention)); ++ off->frag_info_offset = __cpu_to_le16(desc_offset(frag_info)); ++#undef desc_offset ++} ++ ++static struct htt_rx_desc * ++ath10k_rx_desc_wcn3990_from_raw_buffer(void *buff) ++{ ++ return &((struct htt_rx_desc_v2 *)buff)->base; ++} ++ ++static struct rx_attention * ++ath10k_rx_desc_wcn3990_get_attention(struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v2 *rx_desc = container_of(rxd, struct htt_rx_desc_v2, base); ++ ++ return &rx_desc->attention; ++} ++ ++static struct rx_frag_info_common * ++ath10k_rx_desc_wcn3990_get_frag_info(struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v2 *rx_desc = container_of(rxd, struct htt_rx_desc_v2, base); ++ ++ return &rx_desc->frag_info.common; ++} ++ ++static struct rx_mpdu_start * ++ath10k_rx_desc_wcn3990_get_mpdu_start(struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v2 *rx_desc = container_of(rxd, struct htt_rx_desc_v2, base); ++ ++ return &rx_desc->mpdu_start; ++} ++ ++static struct rx_mpdu_end * ++ath10k_rx_desc_wcn3990_get_mpdu_end(struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v2 *rx_desc = container_of(rxd, struct htt_rx_desc_v2, base); ++ ++ return &rx_desc->mpdu_end; ++} ++ ++static struct rx_msdu_start_common * ++ath10k_rx_desc_wcn3990_get_msdu_start(struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v2 *rx_desc = container_of(rxd, struct htt_rx_desc_v2, base); ++ ++ return &rx_desc->msdu_start.common; ++} ++ ++static struct rx_msdu_end_common * ++ath10k_rx_desc_wcn3990_get_msdu_end(struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v2 *rx_desc = container_of(rxd, struct htt_rx_desc_v2, base); ++ ++ return &rx_desc->msdu_end.common; ++} ++ ++static struct rx_ppdu_start * ++ath10k_rx_desc_wcn3990_get_ppdu_start(struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v2 *rx_desc = container_of(rxd, struct htt_rx_desc_v2, base); ++ ++ return &rx_desc->ppdu_start; ++} ++ ++static struct rx_ppdu_end_common * ++ath10k_rx_desc_wcn3990_get_ppdu_end(struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v2 *rx_desc = container_of(rxd, struct htt_rx_desc_v2, base); ++ ++ return &rx_desc->ppdu_end.common; ++} ++ ++static u8 * ++ath10k_rx_desc_wcn3990_get_rx_hdr_status(struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v2 *rx_desc = container_of(rxd, struct htt_rx_desc_v2, base); ++ ++ return rx_desc->rx_hdr_status; ++} ++ ++static u8 * ++ath10k_rx_desc_wcn3990_get_msdu_payload(struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v2 *rx_desc = container_of(rxd, struct htt_rx_desc_v2, base); ++ ++ return rx_desc->msdu_payload; ++} ++ ++const struct ath10k_htt_rx_desc_ops wcn3990_rx_desc_ops = { ++ .rx_desc_size = sizeof(struct htt_rx_desc_v2), ++ .rx_desc_msdu_payload_offset = offsetof(struct htt_rx_desc_v2, msdu_payload), ++ ++ .rx_desc_from_raw_buffer = ath10k_rx_desc_wcn3990_from_raw_buffer, ++ .rx_desc_get_offsets = ath10k_rx_desc_wcn3990_get_offsets, ++ .rx_desc_get_attention = ath10k_rx_desc_wcn3990_get_attention, ++ .rx_desc_get_frag_info = ath10k_rx_desc_wcn3990_get_frag_info, ++ .rx_desc_get_mpdu_start = ath10k_rx_desc_wcn3990_get_mpdu_start, ++ .rx_desc_get_mpdu_end = ath10k_rx_desc_wcn3990_get_mpdu_end, ++ .rx_desc_get_msdu_start = ath10k_rx_desc_wcn3990_get_msdu_start, ++ .rx_desc_get_msdu_end = ath10k_rx_desc_wcn3990_get_msdu_end, ++ .rx_desc_get_ppdu_start = ath10k_rx_desc_wcn3990_get_ppdu_start, ++ .rx_desc_get_ppdu_end = ath10k_rx_desc_wcn3990_get_ppdu_end, ++ .rx_desc_get_rx_hdr_status = ath10k_rx_desc_wcn3990_get_rx_hdr_status, ++ .rx_desc_get_msdu_payload = ath10k_rx_desc_wcn3990_get_msdu_payload, ++}; ++ + int ath10k_htt_connect(struct ath10k_htt *htt) + { + struct ath10k_htc_svc_conn_req conn_req; +diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h +index 9a3a8907389b..f06cf39204e2 100644 +--- a/drivers/net/wireless/ath/ath10k/htt.h ++++ b/drivers/net/wireless/ath/ath10k/htt.h +@@ -240,14 +240,7 @@ enum htt_rx_ring_flags { + #define HTT_RX_RING_FILL_LEVEL (((HTT_RX_RING_SIZE) / 2) - 1) + #define HTT_RX_RING_FILL_LEVEL_DUAL_MAC (HTT_RX_RING_SIZE - 1) + +-struct htt_rx_ring_setup_ring32 { +- __le32 fw_idx_shadow_reg_paddr; +- __le32 rx_ring_base_paddr; +- __le16 rx_ring_len; /* in 4-byte words */ +- __le16 rx_ring_bufsize; /* rx skb size - in bytes */ +- __le16 flags; /* %HTT_RX_RING_FLAGS_ */ +- __le16 fw_idx_init_val; +- ++struct htt_rx_ring_rx_desc_offsets { + /* the following offsets are in 4-byte units */ + __le16 mac80211_hdr_offset; + __le16 msdu_payload_offset; +@@ -261,6 +254,17 @@ struct htt_rx_ring_setup_ring32 { + __le16 frag_info_offset; + } __packed; + ++struct htt_rx_ring_setup_ring32 { ++ __le32 fw_idx_shadow_reg_paddr; ++ __le32 rx_ring_base_paddr; ++ __le16 rx_ring_len; /* in 4-byte words */ ++ __le16 rx_ring_bufsize; /* rx skb size - in bytes */ ++ __le16 flags; /* %HTT_RX_RING_FLAGS_ */ ++ __le16 fw_idx_init_val; ++ ++ struct htt_rx_ring_rx_desc_offsets offsets; ++} __packed; ++ + struct htt_rx_ring_setup_ring64 { + __le64 fw_idx_shadow_reg_paddr; + __le64 rx_ring_base_paddr; +@@ -269,17 +273,7 @@ struct htt_rx_ring_setup_ring64 { + __le16 flags; /* %HTT_RX_RING_FLAGS_ */ + __le16 fw_idx_init_val; + +- /* the following offsets are in 4-byte units */ +- __le16 mac80211_hdr_offset; +- __le16 msdu_payload_offset; +- __le16 ppdu_start_offset; +- __le16 ppdu_end_offset; +- __le16 mpdu_start_offset; +- __le16 mpdu_end_offset; +- __le16 msdu_start_offset; +- __le16 msdu_end_offset; +- __le16 rx_attention_offset; +- __le16 frag_info_offset; ++ struct htt_rx_ring_rx_desc_offsets offsets; + } __packed; + + struct htt_rx_ring_setup_hdr { +@@ -2075,12 +2069,22 @@ static inline bool ath10k_htt_rx_proc_rx_frag_ind(struct ath10k_htt *htt, + return htt->rx_ops->htt_rx_proc_rx_frag_ind(htt, rx, skb); + } + ++/* the driver strongly assumes that the rx header status be 64 bytes long, ++ * so all possible rx_desc structures must respect this assumption. ++ */ + #define RX_HTT_HDR_STATUS_LEN 64 + +-/* This structure layout is programmed via rx ring setup ++/* The rx descriptor structure layout is programmed via rx ring setup + * so that FW knows how to transfer the rx descriptor to the host. +- * Buffers like this are placed on the rx ring. ++ * Unfortunately, though, QCA6174's firmware doesn't currently behave correctly ++ * when modifying the structure layout of the rx descriptor beyond what it expects ++ * (even if it correctly programmed during the rx ring setup). ++ * Therefore we must keep two different memory layouts, abstract the rx descriptor ++ * representation and use ath10k_rx_desc_ops ++ * for correctly accessing rx descriptor data. + */ ++ ++/* base struct used for abstracting the rx descritor representation */ + struct htt_rx_desc { + union { + /* This field is filled on the host using the msdu buffer +@@ -2089,6 +2093,13 @@ struct htt_rx_desc { + struct fw_rx_desc_base fw_desc; + u32 pad; + } __packed; ++} __packed; ++ ++/* rx descriptor for wcn3990 and possibly extensible for newer cards ++ * Buffers like this are placed on the rx ring. ++ */ ++struct htt_rx_desc_v2 { ++ struct htt_rx_desc base; + struct { + struct rx_attention attention; + struct rx_frag_info frag_info; +@@ -2103,6 +2114,240 @@ struct htt_rx_desc { + u8 msdu_payload[]; + }; + ++/* QCA6174, QCA988x, QCA99x0 dedicated rx descriptor to make sure their firmware ++ * works correctly. We keep a single rx descriptor for all these three ++ * families of cards because from tests it seems to be the most stable solution, ++ * e.g. having a rx descriptor only for QCA6174 seldom caused firmware crashes ++ * during some tests. ++ * Buffers like this are placed on the rx ring. ++ */ ++struct htt_rx_desc_v1 { ++ struct htt_rx_desc base; ++ struct { ++ struct rx_attention attention; ++ struct rx_frag_info_v1 frag_info; ++ struct rx_mpdu_start mpdu_start; ++ struct rx_msdu_start_v1 msdu_start; ++ struct rx_msdu_end_v1 msdu_end; ++ struct rx_mpdu_end mpdu_end; ++ struct rx_ppdu_start ppdu_start; ++ struct rx_ppdu_end_v1 ppdu_end; ++ } __packed; ++ u8 rx_hdr_status[RX_HTT_HDR_STATUS_LEN]; ++ u8 msdu_payload[]; ++}; ++ ++/* rx_desc abstraction */ ++struct ath10k_htt_rx_desc_ops { ++ /* These fields are mandatory, they must be specified in any instance */ ++ ++ /* sizeof() of the rx_desc structure used by this hw */ ++ size_t rx_desc_size; ++ ++ /* offset of msdu_payload inside the rx_desc structure used by this hw */ ++ size_t rx_desc_msdu_payload_offset; ++ ++ /* These fields are options. ++ * When a field is not provided the default implementation gets used ++ * (see the ath10k_rx_desc_* operations below for more info about the defaults) ++ */ ++ bool (*rx_desc_get_msdu_limit_error)(struct htt_rx_desc *rxd); ++ int (*rx_desc_get_l3_pad_bytes)(struct htt_rx_desc *rxd); ++ ++ /* Safely cast from a void* buffer containing an rx descriptor ++ * to the proper rx_desc structure ++ */ ++ struct htt_rx_desc *(*rx_desc_from_raw_buffer)(void *buff); ++ ++ void (*rx_desc_get_offsets)(struct htt_rx_ring_rx_desc_offsets *offs); ++ struct rx_attention *(*rx_desc_get_attention)(struct htt_rx_desc *rxd); ++ struct rx_frag_info_common *(*rx_desc_get_frag_info)(struct htt_rx_desc *rxd); ++ struct rx_mpdu_start *(*rx_desc_get_mpdu_start)(struct htt_rx_desc *rxd); ++ struct rx_mpdu_end *(*rx_desc_get_mpdu_end)(struct htt_rx_desc *rxd); ++ struct rx_msdu_start_common *(*rx_desc_get_msdu_start)(struct htt_rx_desc *rxd); ++ struct rx_msdu_end_common *(*rx_desc_get_msdu_end)(struct htt_rx_desc *rxd); ++ struct rx_ppdu_start *(*rx_desc_get_ppdu_start)(struct htt_rx_desc *rxd); ++ struct rx_ppdu_end_common *(*rx_desc_get_ppdu_end)(struct htt_rx_desc *rxd); ++ u8 *(*rx_desc_get_rx_hdr_status)(struct htt_rx_desc *rxd); ++ u8 *(*rx_desc_get_msdu_payload)(struct htt_rx_desc *rxd); ++}; ++ ++extern const struct ath10k_htt_rx_desc_ops qca988x_rx_desc_ops; ++extern const struct ath10k_htt_rx_desc_ops qca99x0_rx_desc_ops; ++extern const struct ath10k_htt_rx_desc_ops wcn3990_rx_desc_ops; ++ ++static inline int ++ath10k_htt_rx_desc_get_l3_pad_bytes(struct ath10k_hw_params *hw, struct htt_rx_desc *rxd) ++{ ++ if (hw->rx_desc_ops->rx_desc_get_l3_pad_bytes) ++ return hw->rx_desc_ops->rx_desc_get_l3_pad_bytes(rxd); ++ return 0; ++} ++ ++static inline bool ++ath10k_htt_rx_desc_msdu_limit_error(struct ath10k_hw_params *hw, struct htt_rx_desc *rxd) ++{ ++ if (hw->rx_desc_ops->rx_desc_get_msdu_limit_error) ++ return hw->rx_desc_ops->rx_desc_get_msdu_limit_error(rxd); ++ return false; ++} ++ ++/* The default implementation of all these getters is using the old rx_desc, ++ * so that it is easier to define the ath10k_htt_rx_desc_ops instances. ++ * But probably, if new wireless cards must be supported, it would be better ++ * to switch the default implementation to the new rx_desc, since this would ++ * make the extension easier . ++ */ ++static inline struct htt_rx_desc * ++ath10k_htt_rx_desc_from_raw_buffer(struct ath10k_hw_params *hw, void *buff) ++{ ++ if (hw->rx_desc_ops->rx_desc_from_raw_buffer) ++ return hw->rx_desc_ops->rx_desc_from_raw_buffer(buff); ++ return &((struct htt_rx_desc_v1 *)buff)->base; ++} ++ ++static inline void ++ath10k_htt_rx_desc_get_offsets(struct ath10k_hw_params *hw, ++ struct htt_rx_ring_rx_desc_offsets *off) ++{ ++ if (hw->rx_desc_ops->rx_desc_get_offsets) { ++ hw->rx_desc_ops->rx_desc_get_offsets(off); ++ } else { ++#define desc_offset(x) (offsetof(struct htt_rx_desc_v1, x) / 4) ++ off->mac80211_hdr_offset = __cpu_to_le16(desc_offset(rx_hdr_status)); ++ off->msdu_payload_offset = __cpu_to_le16(desc_offset(msdu_payload)); ++ off->ppdu_start_offset = __cpu_to_le16(desc_offset(ppdu_start)); ++ off->ppdu_end_offset = __cpu_to_le16(desc_offset(ppdu_end)); ++ off->mpdu_start_offset = __cpu_to_le16(desc_offset(mpdu_start)); ++ off->mpdu_end_offset = __cpu_to_le16(desc_offset(mpdu_end)); ++ off->msdu_start_offset = __cpu_to_le16(desc_offset(msdu_start)); ++ off->msdu_end_offset = __cpu_to_le16(desc_offset(msdu_end)); ++ off->rx_attention_offset = __cpu_to_le16(desc_offset(attention)); ++ off->frag_info_offset = __cpu_to_le16(desc_offset(frag_info)); ++#undef desc_offset ++ } ++} ++ ++static inline struct rx_attention * ++ath10k_htt_rx_desc_get_attention(struct ath10k_hw_params *hw, struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v1 *rx_desc; ++ ++ if (hw->rx_desc_ops->rx_desc_get_attention) ++ return hw->rx_desc_ops->rx_desc_get_attention(rxd); ++ ++ rx_desc = container_of(rxd, struct htt_rx_desc_v1, base); ++ return &rx_desc->attention; ++} ++ ++static inline struct rx_frag_info_common * ++ath10k_htt_rx_desc_get_frag_info(struct ath10k_hw_params *hw, struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v1 *rx_desc; ++ ++ if (hw->rx_desc_ops->rx_desc_get_frag_info) ++ return hw->rx_desc_ops->rx_desc_get_frag_info(rxd); ++ ++ rx_desc = container_of(rxd, struct htt_rx_desc_v1, base); ++ return &rx_desc->frag_info.common; ++} ++ ++static inline struct rx_mpdu_start * ++ath10k_htt_rx_desc_get_mpdu_start(struct ath10k_hw_params *hw, struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v1 *rx_desc; ++ ++ if (hw->rx_desc_ops->rx_desc_get_mpdu_start) ++ return hw->rx_desc_ops->rx_desc_get_mpdu_start(rxd); ++ ++ rx_desc = container_of(rxd, struct htt_rx_desc_v1, base); ++ return &rx_desc->mpdu_start; ++} ++ ++static inline struct rx_mpdu_end * ++ath10k_htt_rx_desc_get_mpdu_end(struct ath10k_hw_params *hw, struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v1 *rx_desc; ++ ++ if (hw->rx_desc_ops->rx_desc_get_mpdu_end) ++ return hw->rx_desc_ops->rx_desc_get_mpdu_end(rxd); ++ ++ rx_desc = container_of(rxd, struct htt_rx_desc_v1, base); ++ return &rx_desc->mpdu_end; ++} ++ ++static inline struct rx_msdu_start_common * ++ath10k_htt_rx_desc_get_msdu_start(struct ath10k_hw_params *hw, struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v1 *rx_desc; ++ ++ if (hw->rx_desc_ops->rx_desc_get_msdu_start) ++ return hw->rx_desc_ops->rx_desc_get_msdu_start(rxd); ++ ++ rx_desc = container_of(rxd, struct htt_rx_desc_v1, base); ++ return &rx_desc->msdu_start.common; ++} ++ ++static inline struct rx_msdu_end_common * ++ath10k_htt_rx_desc_get_msdu_end(struct ath10k_hw_params *hw, struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v1 *rx_desc; ++ ++ if (hw->rx_desc_ops->rx_desc_get_msdu_end) ++ return hw->rx_desc_ops->rx_desc_get_msdu_end(rxd); ++ ++ rx_desc = container_of(rxd, struct htt_rx_desc_v1, base); ++ return &rx_desc->msdu_end.common; ++} ++ ++static inline struct rx_ppdu_start * ++ath10k_htt_rx_desc_get_ppdu_start(struct ath10k_hw_params *hw, struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v1 *rx_desc; ++ ++ if (hw->rx_desc_ops->rx_desc_get_ppdu_start) ++ return hw->rx_desc_ops->rx_desc_get_ppdu_start(rxd); ++ ++ rx_desc = container_of(rxd, struct htt_rx_desc_v1, base); ++ return &rx_desc->ppdu_start; ++} ++ ++static inline struct rx_ppdu_end_common * ++ath10k_htt_rx_desc_get_ppdu_end(struct ath10k_hw_params *hw, struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v1 *rx_desc; ++ ++ if (hw->rx_desc_ops->rx_desc_get_ppdu_end) ++ return hw->rx_desc_ops->rx_desc_get_ppdu_end(rxd); ++ ++ rx_desc = container_of(rxd, struct htt_rx_desc_v1, base); ++ return &rx_desc->ppdu_end.common; ++} ++ ++static inline u8 * ++ath10k_htt_rx_desc_get_rx_hdr_status(struct ath10k_hw_params *hw, struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v1 *rx_desc; ++ ++ if (hw->rx_desc_ops->rx_desc_get_rx_hdr_status) ++ return hw->rx_desc_ops->rx_desc_get_rx_hdr_status(rxd); ++ ++ rx_desc = container_of(rxd, struct htt_rx_desc_v1, base); ++ return rx_desc->rx_hdr_status; ++} ++ ++static inline u8 * ++ath10k_htt_rx_desc_get_msdu_payload(struct ath10k_hw_params *hw, struct htt_rx_desc *rxd) ++{ ++ struct htt_rx_desc_v1 *rx_desc; ++ ++ if (hw->rx_desc_ops->rx_desc_get_msdu_payload) ++ return hw->rx_desc_ops->rx_desc_get_msdu_payload(rxd); ++ ++ rx_desc = container_of(rxd, struct htt_rx_desc_v1, base); ++ return rx_desc->msdu_payload; ++} ++ + #define HTT_RX_DESC_HL_INFO_SEQ_NUM_MASK 0x00000fff + #define HTT_RX_DESC_HL_INFO_SEQ_NUM_LSB 0 + #define HTT_RX_DESC_HL_INFO_ENCRYPTED_MASK 0x00001000 +@@ -2136,7 +2381,14 @@ struct htt_rx_chan_info { + * rounded up to a cache line size. + */ + #define HTT_RX_BUF_SIZE 2048 +-#define HTT_RX_MSDU_SIZE (HTT_RX_BUF_SIZE - (int)sizeof(struct htt_rx_desc)) ++ ++/* The HTT_RX_MSDU_SIZE can't be statically computed anymore, ++ * because it depends on the underlying device rx_desc representation ++ */ ++static inline int ath10k_htt_rx_msdu_size(struct ath10k_hw_params *hw) ++{ ++ return HTT_RX_BUF_SIZE - (int)hw->rx_desc_ops->rx_desc_size; ++} + + /* Refill a bunch of RX buffers for each refill round so that FW/HW can handle + * aggregated traffic more nicely. +diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c +index adbaeb67eedf..9ad64ca84beb 100644 +--- a/drivers/net/wireless/ath/ath10k/htt_rx.c ++++ b/drivers/net/wireless/ath/ath10k/htt_rx.c +@@ -21,7 +21,10 @@ + + #define HTT_RX_RING_REFILL_RESCHED_MS 5 + +-static int ath10k_htt_rx_get_csum_state(struct sk_buff *skb); ++/* shortcut to interpret a raw memory buffer as a rx descriptor */ ++#define HTT_RX_BUF_TO_RX_DESC(hw, buf) ath10k_htt_rx_desc_from_raw_buffer(hw, buf) ++ ++static int ath10k_htt_rx_get_csum_state(struct ath10k_hw_params *hw, struct sk_buff *skb); + + static struct sk_buff * + ath10k_htt_rx_find_skb_paddr(struct ath10k *ar, u64 paddr) +@@ -128,6 +131,7 @@ static void *ath10k_htt_get_vaddr_ring_64(struct ath10k_htt *htt) + + static int __ath10k_htt_rx_ring_fill_n(struct ath10k_htt *htt, int num) + { ++ struct ath10k_hw_params *hw = &htt->ar->hw_params; + struct htt_rx_desc *rx_desc; + struct ath10k_skb_rxcb *rxcb; + struct sk_buff *skb; +@@ -163,8 +167,8 @@ static int __ath10k_htt_rx_ring_fill_n(struct ath10k_htt *htt, int num) + skb->data); + + /* Clear rx_desc attention word before posting to Rx ring */ +- rx_desc = (struct htt_rx_desc *)skb->data; +- rx_desc->attention.flags = __cpu_to_le32(0); ++ rx_desc = HTT_RX_BUF_TO_RX_DESC(hw, skb->data); ++ ath10k_htt_rx_desc_get_attention(hw, rx_desc)->flags = __cpu_to_le32(0); + + paddr = dma_map_single(htt->ar->dev, skb->data, + skb->len + skb_tailroom(skb), +@@ -343,9 +347,14 @@ static int ath10k_htt_rx_amsdu_pop(struct ath10k_htt *htt, + struct sk_buff_head *amsdu) + { + struct ath10k *ar = htt->ar; ++ struct ath10k_hw_params *hw = &ar->hw_params; + int msdu_len, msdu_chaining = 0; + struct sk_buff *msdu; + struct htt_rx_desc *rx_desc; ++ struct rx_attention *rx_desc_attention; ++ struct rx_frag_info_common *rx_desc_frag_info_common; ++ struct rx_msdu_start_common *rx_desc_msdu_start_common; ++ struct rx_msdu_end_common *rx_desc_msdu_end_common; + + lockdep_assert_held(&htt->rx_ring.lock); + +@@ -360,13 +369,18 @@ static int ath10k_htt_rx_amsdu_pop(struct ath10k_htt *htt, + + __skb_queue_tail(amsdu, msdu); + +- rx_desc = (struct htt_rx_desc *)msdu->data; ++ rx_desc = HTT_RX_BUF_TO_RX_DESC(hw, msdu->data); ++ rx_desc_attention = ath10k_htt_rx_desc_get_attention(hw, rx_desc); ++ rx_desc_msdu_start_common = ath10k_htt_rx_desc_get_msdu_start(hw, ++ rx_desc); ++ rx_desc_msdu_end_common = ath10k_htt_rx_desc_get_msdu_end(hw, rx_desc); ++ rx_desc_frag_info_common = ath10k_htt_rx_desc_get_frag_info(hw, rx_desc); + + /* FIXME: we must report msdu payload since this is what caller + * expects now + */ +- skb_put(msdu, offsetof(struct htt_rx_desc, msdu_payload)); +- skb_pull(msdu, offsetof(struct htt_rx_desc, msdu_payload)); ++ skb_put(msdu, hw->rx_desc_ops->rx_desc_msdu_payload_offset); ++ skb_pull(msdu, hw->rx_desc_ops->rx_desc_msdu_payload_offset); + + /* + * Sanity check - confirm the HW is finished filling in the +@@ -376,24 +390,24 @@ static int ath10k_htt_rx_amsdu_pop(struct ath10k_htt *htt, + * To prevent the case that we handle a stale Rx descriptor, + * just assert for now until we have a way to recover. + */ +- if (!(__le32_to_cpu(rx_desc->attention.flags) ++ if (!(__le32_to_cpu(rx_desc_attention->flags) + & RX_ATTENTION_FLAGS_MSDU_DONE)) { + __skb_queue_purge(amsdu); + return -EIO; + } + +- msdu_len_invalid = !!(__le32_to_cpu(rx_desc->attention.flags) ++ msdu_len_invalid = !!(__le32_to_cpu(rx_desc_attention->flags) + & (RX_ATTENTION_FLAGS_MPDU_LENGTH_ERR | + RX_ATTENTION_FLAGS_MSDU_LENGTH_ERR)); +- msdu_len = MS(__le32_to_cpu(rx_desc->msdu_start.common.info0), ++ msdu_len = MS(__le32_to_cpu(rx_desc_msdu_start_common->info0), + RX_MSDU_START_INFO0_MSDU_LENGTH); +- msdu_chained = rx_desc->frag_info.ring2_more_count; ++ msdu_chained = rx_desc_frag_info_common->ring2_more_count; + + if (msdu_len_invalid) + msdu_len = 0; + + skb_trim(msdu, 0); +- skb_put(msdu, min(msdu_len, HTT_RX_MSDU_SIZE)); ++ skb_put(msdu, min(msdu_len, ath10k_htt_rx_msdu_size(hw))); + msdu_len -= msdu->len; + + /* Note: Chained buffers do not contain rx descriptor */ +@@ -411,11 +425,12 @@ static int ath10k_htt_rx_amsdu_pop(struct ath10k_htt *htt, + msdu_chaining = 1; + } + +- last_msdu = __le32_to_cpu(rx_desc->msdu_end.common.info0) & ++ last_msdu = __le32_to_cpu(rx_desc_msdu_end_common->info0) & + RX_MSDU_END_INFO0_LAST_MSDU; + +- trace_ath10k_htt_rx_desc(ar, &rx_desc->attention, +- sizeof(*rx_desc) - sizeof(u32)); ++ /* FIXME: why are we skipping the first part of the rx_desc? */ ++ trace_ath10k_htt_rx_desc(ar, rx_desc + sizeof(u32), ++ hw->rx_desc_ops->rx_desc_size - sizeof(u32)); + + if (last_msdu) + break; +@@ -480,6 +495,7 @@ static int ath10k_htt_rx_handle_amsdu_mon_32(struct ath10k_htt *htt, + struct htt_rx_in_ord_msdu_desc **msdu_desc) + { + struct ath10k *ar = htt->ar; ++ struct ath10k_hw_params *hw = &ar->hw_params; + u32 paddr; + struct sk_buff *frag_buf; + struct sk_buff *prev_frag_buf; +@@ -488,12 +504,12 @@ static int ath10k_htt_rx_handle_amsdu_mon_32(struct ath10k_htt *htt, + struct htt_rx_desc *rxd; + int amsdu_len = __le16_to_cpu(ind_desc->msdu_len); + +- rxd = (void *)msdu->data; +- trace_ath10k_htt_rx_desc(ar, rxd, sizeof(*rxd)); ++ rxd = HTT_RX_BUF_TO_RX_DESC(hw, msdu->data); ++ trace_ath10k_htt_rx_desc(ar, rxd, hw->rx_desc_ops->rx_desc_size); + +- skb_put(msdu, sizeof(struct htt_rx_desc)); +- skb_pull(msdu, sizeof(struct htt_rx_desc)); +- skb_put(msdu, min(amsdu_len, HTT_RX_MSDU_SIZE)); ++ skb_put(msdu, hw->rx_desc_ops->rx_desc_size); ++ skb_pull(msdu, hw->rx_desc_ops->rx_desc_size); ++ skb_put(msdu, min(amsdu_len, ath10k_htt_rx_msdu_size(hw))); + amsdu_len -= msdu->len; + + last_frag = ind_desc->reserved; +@@ -556,6 +572,7 @@ ath10k_htt_rx_handle_amsdu_mon_64(struct ath10k_htt *htt, + struct htt_rx_in_ord_msdu_desc_ext **msdu_desc) + { + struct ath10k *ar = htt->ar; ++ struct ath10k_hw_params *hw = &ar->hw_params; + u64 paddr; + struct sk_buff *frag_buf; + struct sk_buff *prev_frag_buf; +@@ -564,12 +581,12 @@ ath10k_htt_rx_handle_amsdu_mon_64(struct ath10k_htt *htt, + struct htt_rx_desc *rxd; + int amsdu_len = __le16_to_cpu(ind_desc->msdu_len); + +- rxd = (void *)msdu->data; +- trace_ath10k_htt_rx_desc(ar, rxd, sizeof(*rxd)); ++ rxd = HTT_RX_BUF_TO_RX_DESC(hw, msdu->data); ++ trace_ath10k_htt_rx_desc(ar, rxd, hw->rx_desc_ops->rx_desc_size); + +- skb_put(msdu, sizeof(struct htt_rx_desc)); +- skb_pull(msdu, sizeof(struct htt_rx_desc)); +- skb_put(msdu, min(amsdu_len, HTT_RX_MSDU_SIZE)); ++ skb_put(msdu, hw->rx_desc_ops->rx_desc_size); ++ skb_pull(msdu, hw->rx_desc_ops->rx_desc_size); ++ skb_put(msdu, min(amsdu_len, ath10k_htt_rx_msdu_size(hw))); + amsdu_len -= msdu->len; + + last_frag = ind_desc->reserved; +@@ -631,8 +648,10 @@ static int ath10k_htt_rx_pop_paddr32_list(struct ath10k_htt *htt, + struct sk_buff_head *list) + { + struct ath10k *ar = htt->ar; ++ struct ath10k_hw_params *hw = &ar->hw_params; + struct htt_rx_in_ord_msdu_desc *msdu_desc = ev->msdu_descs32; + struct htt_rx_desc *rxd; ++ struct rx_attention *rxd_attention; + struct sk_buff *msdu; + int msdu_count, ret; + bool is_offload; +@@ -667,15 +686,16 @@ static int ath10k_htt_rx_pop_paddr32_list(struct ath10k_htt *htt, + __skb_queue_tail(list, msdu); + + if (!is_offload) { +- rxd = (void *)msdu->data; ++ rxd = HTT_RX_BUF_TO_RX_DESC(hw, msdu->data); ++ rxd_attention = ath10k_htt_rx_desc_get_attention(hw, rxd); + +- trace_ath10k_htt_rx_desc(ar, rxd, sizeof(*rxd)); ++ trace_ath10k_htt_rx_desc(ar, rxd, hw->rx_desc_ops->rx_desc_size); + +- skb_put(msdu, sizeof(*rxd)); +- skb_pull(msdu, sizeof(*rxd)); ++ skb_put(msdu, hw->rx_desc_ops->rx_desc_size); ++ skb_pull(msdu, hw->rx_desc_ops->rx_desc_size); + skb_put(msdu, __le16_to_cpu(msdu_desc->msdu_len)); + +- if (!(__le32_to_cpu(rxd->attention.flags) & ++ if (!(__le32_to_cpu(rxd_attention->flags) & + RX_ATTENTION_FLAGS_MSDU_DONE)) { + ath10k_warn(htt->ar, "tried to pop an incomplete frame, oops!\n"); + return -EIO; +@@ -693,8 +713,10 @@ static int ath10k_htt_rx_pop_paddr64_list(struct ath10k_htt *htt, + struct sk_buff_head *list) + { + struct ath10k *ar = htt->ar; ++ struct ath10k_hw_params *hw = &ar->hw_params; + struct htt_rx_in_ord_msdu_desc_ext *msdu_desc = ev->msdu_descs64; + struct htt_rx_desc *rxd; ++ struct rx_attention *rxd_attention; + struct sk_buff *msdu; + int msdu_count, ret; + bool is_offload; +@@ -728,15 +750,16 @@ static int ath10k_htt_rx_pop_paddr64_list(struct ath10k_htt *htt, + __skb_queue_tail(list, msdu); + + if (!is_offload) { +- rxd = (void *)msdu->data; ++ rxd = HTT_RX_BUF_TO_RX_DESC(hw, msdu->data); ++ rxd_attention = ath10k_htt_rx_desc_get_attention(hw, rxd); + +- trace_ath10k_htt_rx_desc(ar, rxd, sizeof(*rxd)); ++ trace_ath10k_htt_rx_desc(ar, rxd, hw->rx_desc_ops->rx_desc_size); + +- skb_put(msdu, sizeof(*rxd)); +- skb_pull(msdu, sizeof(*rxd)); ++ skb_put(msdu, hw->rx_desc_ops->rx_desc_size); ++ skb_pull(msdu, hw->rx_desc_ops->rx_desc_size); + skb_put(msdu, __le16_to_cpu(msdu_desc->msdu_len)); + +- if (!(__le32_to_cpu(rxd->attention.flags) & ++ if (!(__le32_to_cpu(rxd_attention->flags) & + RX_ATTENTION_FLAGS_MSDU_DONE)) { + ath10k_warn(htt->ar, "tried to pop an incomplete frame, oops!\n"); + return -EIO; +@@ -944,16 +967,32 @@ static void ath10k_htt_rx_h_rates(struct ath10k *ar, + struct ieee80211_rx_status *status, + struct htt_rx_desc *rxd) + { ++ struct ath10k_hw_params *hw = &ar->hw_params; ++ struct rx_attention *rxd_attention; ++ struct rx_mpdu_start *rxd_mpdu_start; ++ struct rx_mpdu_end *rxd_mpdu_end; ++ struct rx_msdu_start_common *rxd_msdu_start_common; ++ struct rx_msdu_end_common *rxd_msdu_end_common; ++ struct rx_ppdu_start *rxd_ppdu_start; + struct ieee80211_supported_band *sband; + u8 cck, rate, bw, sgi, mcs, nss; ++ u8 *rxd_msdu_payload; + u8 preamble = 0; + u8 group_id; + u32 info1, info2, info3; + u32 stbc, nsts_su; + +- info1 = __le32_to_cpu(rxd->ppdu_start.info1); +- info2 = __le32_to_cpu(rxd->ppdu_start.info2); +- info3 = __le32_to_cpu(rxd->ppdu_start.info3); ++ rxd_attention = ath10k_htt_rx_desc_get_attention(hw, rxd); ++ rxd_mpdu_start = ath10k_htt_rx_desc_get_mpdu_start(hw, rxd); ++ rxd_mpdu_end = ath10k_htt_rx_desc_get_mpdu_end(hw, rxd); ++ rxd_msdu_start_common = ath10k_htt_rx_desc_get_msdu_start(hw, rxd); ++ rxd_msdu_end_common = ath10k_htt_rx_desc_get_msdu_end(hw, rxd); ++ rxd_ppdu_start = ath10k_htt_rx_desc_get_ppdu_start(hw, rxd); ++ rxd_msdu_payload = ath10k_htt_rx_desc_get_msdu_payload(hw, rxd); ++ ++ info1 = __le32_to_cpu(rxd_ppdu_start->info1); ++ info2 = __le32_to_cpu(rxd_ppdu_start->info2); ++ info3 = __le32_to_cpu(rxd_ppdu_start->info3); + + preamble = MS(info1, RX_PPDU_START_INFO1_PREAMBLE_TYPE); + +@@ -1022,24 +1061,24 @@ static void ath10k_htt_rx_h_rates(struct ath10k *ar, + if (mcs > 0x09) { + ath10k_warn(ar, "invalid MCS received %u\n", mcs); + ath10k_warn(ar, "rxd %08x mpdu start %08x %08x msdu start %08x %08x ppdu start %08x %08x %08x %08x %08x\n", +- __le32_to_cpu(rxd->attention.flags), +- __le32_to_cpu(rxd->mpdu_start.info0), +- __le32_to_cpu(rxd->mpdu_start.info1), +- __le32_to_cpu(rxd->msdu_start.common.info0), +- __le32_to_cpu(rxd->msdu_start.common.info1), +- rxd->ppdu_start.info0, +- __le32_to_cpu(rxd->ppdu_start.info1), +- __le32_to_cpu(rxd->ppdu_start.info2), +- __le32_to_cpu(rxd->ppdu_start.info3), +- __le32_to_cpu(rxd->ppdu_start.info4)); ++ __le32_to_cpu(rxd_attention->flags), ++ __le32_to_cpu(rxd_mpdu_start->info0), ++ __le32_to_cpu(rxd_mpdu_start->info1), ++ __le32_to_cpu(rxd_msdu_start_common->info0), ++ __le32_to_cpu(rxd_msdu_start_common->info1), ++ rxd_ppdu_start->info0, ++ __le32_to_cpu(rxd_ppdu_start->info1), ++ __le32_to_cpu(rxd_ppdu_start->info2), ++ __le32_to_cpu(rxd_ppdu_start->info3), ++ __le32_to_cpu(rxd_ppdu_start->info4)); + + ath10k_warn(ar, "msdu end %08x mpdu end %08x\n", +- __le32_to_cpu(rxd->msdu_end.common.info0), +- __le32_to_cpu(rxd->mpdu_end.info0)); ++ __le32_to_cpu(rxd_msdu_end_common->info0), ++ __le32_to_cpu(rxd_mpdu_end->info0)); + + ath10k_dbg_dump(ar, ATH10K_DBG_HTT_DUMP, NULL, + "rx desc msdu payload: ", +- rxd->msdu_payload, 50); ++ rxd_msdu_payload, 50); + } + + status->rate_idx = mcs; +@@ -1059,6 +1098,10 @@ static void ath10k_htt_rx_h_rates(struct ath10k *ar, + static struct ieee80211_channel * + ath10k_htt_rx_h_peer_channel(struct ath10k *ar, struct htt_rx_desc *rxd) + { ++ struct ath10k_hw_params *hw = &ar->hw_params; ++ struct rx_attention *rxd_attention; ++ struct rx_msdu_end_common *rxd_msdu_end_common; ++ struct rx_mpdu_start *rxd_mpdu_start; + struct ath10k_peer *peer; + struct ath10k_vif *arvif; + struct cfg80211_chan_def def; +@@ -1069,15 +1112,19 @@ ath10k_htt_rx_h_peer_channel(struct ath10k *ar, struct htt_rx_desc *rxd) + if (!rxd) + return NULL; + +- if (rxd->attention.flags & ++ rxd_attention = ath10k_htt_rx_desc_get_attention(hw, rxd); ++ rxd_msdu_end_common = ath10k_htt_rx_desc_get_msdu_end(hw, rxd); ++ rxd_mpdu_start = ath10k_htt_rx_desc_get_mpdu_start(hw, rxd); ++ ++ if (rxd_attention->flags & + __cpu_to_le32(RX_ATTENTION_FLAGS_PEER_IDX_INVALID)) + return NULL; + +- if (!(rxd->msdu_end.common.info0 & ++ if (!(rxd_msdu_end_common->info0 & + __cpu_to_le32(RX_MSDU_END_INFO0_FIRST_MSDU))) + return NULL; + +- peer_id = MS(__le32_to_cpu(rxd->mpdu_start.info0), ++ peer_id = MS(__le32_to_cpu(rxd_mpdu_start->info0), + RX_MPDU_START_INFO0_PEER_IDX); + + peer = ath10k_peer_find_by_id(ar, peer_id); +@@ -1167,14 +1214,16 @@ static void ath10k_htt_rx_h_signal(struct ath10k *ar, + struct ieee80211_rx_status *status, + struct htt_rx_desc *rxd) + { ++ struct ath10k_hw_params *hw = &ar->hw_params; ++ struct rx_ppdu_start *rxd_ppdu_start = ath10k_htt_rx_desc_get_ppdu_start(hw, rxd); + int i; + + for (i = 0; i < IEEE80211_MAX_CHAINS ; i++) { + status->chains &= ~BIT(i); + +- if (rxd->ppdu_start.rssi_chains[i].pri20_mhz != 0x80) { ++ if (rxd_ppdu_start->rssi_chains[i].pri20_mhz != 0x80) { + status->chain_signal[i] = ATH10K_DEFAULT_NOISE_FLOOR + +- rxd->ppdu_start.rssi_chains[i].pri20_mhz; ++ rxd_ppdu_start->rssi_chains[i].pri20_mhz; + + status->chains |= BIT(i); + } +@@ -1182,7 +1231,7 @@ static void ath10k_htt_rx_h_signal(struct ath10k *ar, + + /* FIXME: Get real NF */ + status->signal = ATH10K_DEFAULT_NOISE_FLOOR + +- rxd->ppdu_start.rssi_comb; ++ rxd_ppdu_start->rssi_comb; + status->flag &= ~RX_FLAG_NO_SIGNAL_VAL; + } + +@@ -1190,13 +1239,18 @@ static void ath10k_htt_rx_h_mactime(struct ath10k *ar, + struct ieee80211_rx_status *status, + struct htt_rx_desc *rxd) + { ++ struct ath10k_hw_params *hw = &ar->hw_params; ++ struct rx_ppdu_end_common *rxd_ppdu_end_common; ++ ++ rxd_ppdu_end_common = ath10k_htt_rx_desc_get_ppdu_end(hw, rxd); ++ + /* FIXME: TSF is known only at the end of PPDU, in the last MPDU. This + * means all prior MSDUs in a PPDU are reported to mac80211 without the + * TSF. Is it worth holding frames until end of PPDU is known? + * + * FIXME: Can we get/compute 64bit TSF? + */ +- status->mactime = __le32_to_cpu(rxd->ppdu_end.common.tsf_timestamp); ++ status->mactime = __le32_to_cpu(rxd_ppdu_end_common->tsf_timestamp); + status->flag |= RX_FLAG_MACTIME_END; + } + +@@ -1206,7 +1260,9 @@ static void ath10k_htt_rx_h_ppdu(struct ath10k *ar, + u32 vdev_id) + { + struct sk_buff *first; ++ struct ath10k_hw_params *hw = &ar->hw_params; + struct htt_rx_desc *rxd; ++ struct rx_attention *rxd_attention; + bool is_first_ppdu; + bool is_last_ppdu; + +@@ -1214,11 +1270,14 @@ static void ath10k_htt_rx_h_ppdu(struct ath10k *ar, + return; + + first = skb_peek(amsdu); +- rxd = (void *)first->data - sizeof(*rxd); ++ rxd = HTT_RX_BUF_TO_RX_DESC(hw, ++ (void *)first->data - hw->rx_desc_ops->rx_desc_size); + +- is_first_ppdu = !!(rxd->attention.flags & ++ rxd_attention = ath10k_htt_rx_desc_get_attention(hw, rxd); ++ ++ is_first_ppdu = !!(rxd_attention->flags & + __cpu_to_le32(RX_ATTENTION_FLAGS_FIRST_MPDU)); +- is_last_ppdu = !!(rxd->attention.flags & ++ is_last_ppdu = !!(rxd_attention->flags & + __cpu_to_le32(RX_ATTENTION_FLAGS_LAST_MPDU)); + + if (is_first_ppdu) { +@@ -1357,7 +1416,9 @@ static void ath10k_htt_rx_h_undecap_raw(struct ath10k *ar, + const u8 first_hdr[64]) + { + struct ieee80211_hdr *hdr; ++ struct ath10k_hw_params *hw = &ar->hw_params; + struct htt_rx_desc *rxd; ++ struct rx_msdu_end_common *rxd_msdu_end_common; + size_t hdr_len; + size_t crypto_len; + bool is_first; +@@ -1366,10 +1427,13 @@ static void ath10k_htt_rx_h_undecap_raw(struct ath10k *ar, + int bytes_aligned = ar->hw_params.decap_align_bytes; + u8 *qos; + +- rxd = (void *)msdu->data - sizeof(*rxd); +- is_first = !!(rxd->msdu_end.common.info0 & ++ rxd = HTT_RX_BUF_TO_RX_DESC(hw, ++ (void *)msdu->data - hw->rx_desc_ops->rx_desc_size); ++ ++ rxd_msdu_end_common = ath10k_htt_rx_desc_get_msdu_end(hw, rxd); ++ is_first = !!(rxd_msdu_end_common->info0 & + __cpu_to_le32(RX_MSDU_END_INFO0_FIRST_MSDU)); +- is_last = !!(rxd->msdu_end.common.info0 & ++ is_last = !!(rxd_msdu_end_common->info0 & + __cpu_to_le32(RX_MSDU_END_INFO0_LAST_MSDU)); + + /* Delivered decapped frame: +@@ -1387,7 +1451,7 @@ static void ath10k_htt_rx_h_undecap_raw(struct ath10k *ar, + * error packets. If limit exceeds, hw sends all remaining MSDUs as + * a single last MSDU with this msdu limit error set. + */ +- msdu_limit_err = ath10k_rx_desc_msdu_limit_error(&ar->hw_params, rxd); ++ msdu_limit_err = ath10k_htt_rx_desc_msdu_limit_error(hw, rxd); + + /* If MSDU limit error happens, then don't warn on, the partial raw MSDU + * without first MSDU is expected in that case, and handled later here. +@@ -1479,6 +1543,7 @@ static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar, + const u8 first_hdr[64], + enum htt_rx_mpdu_encrypt_type enctype) + { ++ struct ath10k_hw_params *hw = &ar->hw_params; + struct ieee80211_hdr *hdr; + struct htt_rx_desc *rxd; + size_t hdr_len; +@@ -1499,9 +1564,10 @@ static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar, + */ + + /* pull decapped header and copy SA & DA */ +- rxd = (void *)msdu->data - sizeof(*rxd); ++ rxd = HTT_RX_BUF_TO_RX_DESC(hw, (void *)msdu->data - ++ hw->rx_desc_ops->rx_desc_size); + +- l3_pad_bytes = ath10k_rx_desc_get_l3_pad_bytes(&ar->hw_params, rxd); ++ l3_pad_bytes = ath10k_htt_rx_desc_get_l3_pad_bytes(&ar->hw_params, rxd); + skb_put(msdu, l3_pad_bytes); + + hdr = (struct ieee80211_hdr *)(msdu->data + l3_pad_bytes); +@@ -1537,18 +1603,25 @@ static void *ath10k_htt_rx_h_find_rfc1042(struct ath10k *ar, + enum htt_rx_mpdu_encrypt_type enctype) + { + struct ieee80211_hdr *hdr; ++ struct ath10k_hw_params *hw = &ar->hw_params; + struct htt_rx_desc *rxd; ++ struct rx_msdu_end_common *rxd_msdu_end_common; ++ u8 *rxd_rx_hdr_status; + size_t hdr_len, crypto_len; + void *rfc1042; + bool is_first, is_last, is_amsdu; + int bytes_aligned = ar->hw_params.decap_align_bytes; + +- rxd = (void *)msdu->data - sizeof(*rxd); +- hdr = (void *)rxd->rx_hdr_status; ++ rxd = HTT_RX_BUF_TO_RX_DESC(hw, ++ (void *)msdu->data - hw->rx_desc_ops->rx_desc_size); ++ ++ rxd_msdu_end_common = ath10k_htt_rx_desc_get_msdu_end(hw, rxd); ++ rxd_rx_hdr_status = ath10k_htt_rx_desc_get_rx_hdr_status(hw, rxd); ++ hdr = (void *)rxd_rx_hdr_status; + +- is_first = !!(rxd->msdu_end.common.info0 & ++ is_first = !!(rxd_msdu_end_common->info0 & + __cpu_to_le32(RX_MSDU_END_INFO0_FIRST_MSDU)); +- is_last = !!(rxd->msdu_end.common.info0 & ++ is_last = !!(rxd_msdu_end_common->info0 & + __cpu_to_le32(RX_MSDU_END_INFO0_LAST_MSDU)); + is_amsdu = !(is_first && is_last); + +@@ -1574,6 +1647,7 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar, + const u8 first_hdr[64], + enum htt_rx_mpdu_encrypt_type enctype) + { ++ struct ath10k_hw_params *hw = &ar->hw_params; + struct ieee80211_hdr *hdr; + struct ethhdr *eth; + size_t hdr_len; +@@ -1593,8 +1667,10 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar, + if (WARN_ON_ONCE(!rfc1042)) + return; + +- rxd = (void *)msdu->data - sizeof(*rxd); +- l3_pad_bytes = ath10k_rx_desc_get_l3_pad_bytes(&ar->hw_params, rxd); ++ rxd = HTT_RX_BUF_TO_RX_DESC(hw, ++ (void *)msdu->data - hw->rx_desc_ops->rx_desc_size); ++ ++ l3_pad_bytes = ath10k_htt_rx_desc_get_l3_pad_bytes(&ar->hw_params, rxd); + skb_put(msdu, l3_pad_bytes); + skb_pull(msdu, l3_pad_bytes); + +@@ -1635,6 +1711,7 @@ static void ath10k_htt_rx_h_undecap_snap(struct ath10k *ar, + const u8 first_hdr[64], + enum htt_rx_mpdu_encrypt_type enctype) + { ++ struct ath10k_hw_params *hw = &ar->hw_params; + struct ieee80211_hdr *hdr; + size_t hdr_len; + int l3_pad_bytes; +@@ -1647,8 +1724,10 @@ static void ath10k_htt_rx_h_undecap_snap(struct ath10k *ar, + * [payload] + */ + +- rxd = (void *)msdu->data - sizeof(*rxd); +- l3_pad_bytes = ath10k_rx_desc_get_l3_pad_bytes(&ar->hw_params, rxd); ++ rxd = HTT_RX_BUF_TO_RX_DESC(hw, ++ (void *)msdu->data - hw->rx_desc_ops->rx_desc_size); ++ ++ l3_pad_bytes = ath10k_htt_rx_desc_get_l3_pad_bytes(&ar->hw_params, rxd); + + skb_put(msdu, l3_pad_bytes); + skb_pull(msdu, sizeof(struct amsdu_subframe_hdr) + l3_pad_bytes); +@@ -1673,7 +1752,9 @@ static void ath10k_htt_rx_h_undecap(struct ath10k *ar, + enum htt_rx_mpdu_encrypt_type enctype, + bool is_decrypted) + { ++ struct ath10k_hw_params *hw = &ar->hw_params; + struct htt_rx_desc *rxd; ++ struct rx_msdu_start_common *rxd_msdu_start_common; + enum rx_msdu_decap_format decap; + + /* First msdu's decapped header: +@@ -1687,8 +1768,11 @@ static void ath10k_htt_rx_h_undecap(struct ath10k *ar, + * [rfc1042/llc] + */ + +- rxd = (void *)msdu->data - sizeof(*rxd); +- decap = MS(__le32_to_cpu(rxd->msdu_start.common.info1), ++ rxd = HTT_RX_BUF_TO_RX_DESC(hw, ++ (void *)msdu->data - hw->rx_desc_ops->rx_desc_size); ++ ++ rxd_msdu_start_common = ath10k_htt_rx_desc_get_msdu_start(hw, rxd); ++ decap = MS(__le32_to_cpu(rxd_msdu_start_common->info1), + RX_MSDU_START_INFO1_DECAP_FORMAT); + + switch (decap) { +@@ -1710,17 +1794,23 @@ static void ath10k_htt_rx_h_undecap(struct ath10k *ar, + } + } + +-static int ath10k_htt_rx_get_csum_state(struct sk_buff *skb) ++static int ath10k_htt_rx_get_csum_state(struct ath10k_hw_params *hw, struct sk_buff *skb) + { + struct htt_rx_desc *rxd; ++ struct rx_attention *rxd_attention; ++ struct rx_msdu_start_common *rxd_msdu_start_common; + u32 flags, info; + bool is_ip4, is_ip6; + bool is_tcp, is_udp; + bool ip_csum_ok, tcpudp_csum_ok; + +- rxd = (void *)skb->data - sizeof(*rxd); +- flags = __le32_to_cpu(rxd->attention.flags); +- info = __le32_to_cpu(rxd->msdu_start.common.info1); ++ rxd = HTT_RX_BUF_TO_RX_DESC(hw, ++ (void *)skb->data - hw->rx_desc_ops->rx_desc_size); ++ ++ rxd_attention = ath10k_htt_rx_desc_get_attention(hw, rxd); ++ rxd_msdu_start_common = ath10k_htt_rx_desc_get_msdu_start(hw, rxd); ++ flags = __le32_to_cpu(rxd_attention->flags); ++ info = __le32_to_cpu(rxd_msdu_start_common->info1); + + is_ip4 = !!(info & RX_MSDU_START_INFO1_IPV4_PROTO); + is_ip6 = !!(info & RX_MSDU_START_INFO1_IPV6_PROTO); +@@ -1741,9 +1831,10 @@ static int ath10k_htt_rx_get_csum_state(struct sk_buff *skb) + return CHECKSUM_UNNECESSARY; + } + +-static void ath10k_htt_rx_h_csum_offload(struct sk_buff *msdu) ++static void ath10k_htt_rx_h_csum_offload(struct ath10k_hw_params *hw, ++ struct sk_buff *msdu) + { +- msdu->ip_summed = ath10k_htt_rx_get_csum_state(msdu); ++ msdu->ip_summed = ath10k_htt_rx_get_csum_state(hw, msdu); + } + + static u64 ath10k_htt_rx_h_get_pn(struct ath10k *ar, struct sk_buff *skb, +@@ -1835,7 +1926,11 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, + struct sk_buff *first; + struct sk_buff *last; + struct sk_buff *msdu, *temp; ++ struct ath10k_hw_params *hw = &ar->hw_params; + struct htt_rx_desc *rxd; ++ struct rx_attention *rxd_attention; ++ struct rx_mpdu_start *rxd_mpdu_start; ++ + struct ieee80211_hdr *hdr; + enum htt_rx_mpdu_encrypt_type enctype; + u8 first_hdr[64]; +@@ -1853,18 +1948,22 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, + return; + + first = skb_peek(amsdu); +- rxd = (void *)first->data - sizeof(*rxd); ++ rxd = HTT_RX_BUF_TO_RX_DESC(hw, ++ (void *)first->data - hw->rx_desc_ops->rx_desc_size); + +- is_mgmt = !!(rxd->attention.flags & ++ rxd_attention = ath10k_htt_rx_desc_get_attention(hw, rxd); ++ rxd_mpdu_start = ath10k_htt_rx_desc_get_mpdu_start(hw, rxd); ++ ++ is_mgmt = !!(rxd_attention->flags & + __cpu_to_le32(RX_ATTENTION_FLAGS_MGMT_TYPE)); + +- enctype = MS(__le32_to_cpu(rxd->mpdu_start.info0), ++ enctype = MS(__le32_to_cpu(rxd_mpdu_start->info0), + RX_MPDU_START_INFO0_ENCRYPT_TYPE); + + /* First MSDU's Rx descriptor in an A-MSDU contains full 802.11 + * decapped header. It'll be used for undecapping of each MSDU. + */ +- hdr = (void *)rxd->rx_hdr_status; ++ hdr = (void *)ath10k_htt_rx_desc_get_rx_hdr_status(hw, rxd); + memcpy(first_hdr, hdr, RX_HTT_HDR_STATUS_LEN); + + if (rx_hdr) +@@ -1882,8 +1981,11 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, + + /* Some attention flags are valid only in the last MSDU. */ + last = skb_peek_tail(amsdu); +- rxd = (void *)last->data - sizeof(*rxd); +- attention = __le32_to_cpu(rxd->attention.flags); ++ rxd = HTT_RX_BUF_TO_RX_DESC(hw, ++ (void *)last->data - hw->rx_desc_ops->rx_desc_size); ++ ++ rxd_attention = ath10k_htt_rx_desc_get_attention(hw, rxd); ++ attention = __le32_to_cpu(rxd_attention->flags); + + has_fcs_err = !!(attention & RX_ATTENTION_FLAGS_FCS_ERR); + has_crypto_err = !!(attention & RX_ATTENTION_FLAGS_DECRYPT_ERR); +@@ -1971,7 +2073,7 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, + continue; + } + +- ath10k_htt_rx_h_csum_offload(msdu); ++ ath10k_htt_rx_h_csum_offload(&ar->hw_params, msdu); + + if (frag && !fill_crypt_header && + enctype == HTT_RX_MPDU_ENCRYPT_TKIP_WPA) +@@ -2083,12 +2185,19 @@ static void ath10k_htt_rx_h_unchain(struct ath10k *ar, + unsigned long *unchain_cnt) + { + struct sk_buff *first; ++ struct ath10k_hw_params *hw = &ar->hw_params; + struct htt_rx_desc *rxd; ++ struct rx_msdu_start_common *rxd_msdu_start_common; ++ struct rx_frag_info_common *rxd_frag_info; + enum rx_msdu_decap_format decap; + + first = skb_peek(amsdu); +- rxd = (void *)first->data - sizeof(*rxd); +- decap = MS(__le32_to_cpu(rxd->msdu_start.common.info1), ++ rxd = HTT_RX_BUF_TO_RX_DESC(hw, ++ (void *)first->data - hw->rx_desc_ops->rx_desc_size); ++ ++ rxd_msdu_start_common = ath10k_htt_rx_desc_get_msdu_start(hw, rxd); ++ rxd_frag_info = ath10k_htt_rx_desc_get_frag_info(hw, rxd); ++ decap = MS(__le32_to_cpu(rxd_msdu_start_common->info1), + RX_MSDU_START_INFO1_DECAP_FORMAT); + + /* FIXME: Current unchaining logic can only handle simple case of raw +@@ -2097,7 +2206,7 @@ static void ath10k_htt_rx_h_unchain(struct ath10k *ar, + * try re-constructing such frames - it'll be pretty much garbage. + */ + if (decap != RX_MSDU_DECAP_RAW || +- skb_queue_len(amsdu) != 1 + rxd->frag_info.ring2_more_count) { ++ skb_queue_len(amsdu) != 1 + rxd_frag_info->ring2_more_count) { + *drop_cnt += skb_queue_len(amsdu); + __skb_queue_purge(amsdu); + return; +@@ -2112,7 +2221,10 @@ static bool ath10k_htt_rx_validate_amsdu(struct ath10k *ar, + u8 *subframe_hdr; + struct sk_buff *first; + bool is_first, is_last; ++ struct ath10k_hw_params *hw = &ar->hw_params; + struct htt_rx_desc *rxd; ++ struct rx_msdu_end_common *rxd_msdu_end_common; ++ struct rx_mpdu_start *rxd_mpdu_start; + struct ieee80211_hdr *hdr; + size_t hdr_len, crypto_len; + enum htt_rx_mpdu_encrypt_type enctype; +@@ -2120,12 +2232,16 @@ static bool ath10k_htt_rx_validate_amsdu(struct ath10k *ar, + + first = skb_peek(amsdu); + +- rxd = (void *)first->data - sizeof(*rxd); +- hdr = (void *)rxd->rx_hdr_status; ++ rxd = HTT_RX_BUF_TO_RX_DESC(hw, ++ (void *)first->data - hw->rx_desc_ops->rx_desc_size); ++ ++ rxd_msdu_end_common = ath10k_htt_rx_desc_get_msdu_end(hw, rxd); ++ rxd_mpdu_start = ath10k_htt_rx_desc_get_mpdu_start(hw, rxd); ++ hdr = (void *)ath10k_htt_rx_desc_get_rx_hdr_status(hw, rxd); + +- is_first = !!(rxd->msdu_end.common.info0 & ++ is_first = !!(rxd_msdu_end_common->info0 & + __cpu_to_le32(RX_MSDU_END_INFO0_FIRST_MSDU)); +- is_last = !!(rxd->msdu_end.common.info0 & ++ is_last = !!(rxd_msdu_end_common->info0 & + __cpu_to_le32(RX_MSDU_END_INFO0_LAST_MSDU)); + + /* Return in case of non-aggregated msdu */ +@@ -2136,7 +2252,7 @@ static bool ath10k_htt_rx_validate_amsdu(struct ath10k *ar, + if (!is_first) + return false; + +- enctype = MS(__le32_to_cpu(rxd->mpdu_start.info0), ++ enctype = MS(__le32_to_cpu(rxd_mpdu_start->info0), + RX_MPDU_START_INFO0_ENCRYPT_TYPE); + + hdr_len = ieee80211_hdrlen(hdr->frame_control); +@@ -3028,11 +3144,13 @@ static void ath10k_htt_rx_delba(struct ath10k *ar, struct htt_resp *resp) + spin_unlock_bh(&ar->data_lock); + } + +-static int ath10k_htt_rx_extract_amsdu(struct sk_buff_head *list, ++static int ath10k_htt_rx_extract_amsdu(struct ath10k_hw_params *hw, ++ struct sk_buff_head *list, + struct sk_buff_head *amsdu) + { + struct sk_buff *msdu; + struct htt_rx_desc *rxd; ++ struct rx_msdu_end_common *rxd_msdu_end_common; + + if (skb_queue_empty(list)) + return -ENOBUFS; +@@ -3043,15 +3161,22 @@ static int ath10k_htt_rx_extract_amsdu(struct sk_buff_head *list, + while ((msdu = __skb_dequeue(list))) { + __skb_queue_tail(amsdu, msdu); + +- rxd = (void *)msdu->data - sizeof(*rxd); +- if (rxd->msdu_end.common.info0 & ++ rxd = HTT_RX_BUF_TO_RX_DESC(hw, ++ (void *)msdu->data - ++ hw->rx_desc_ops->rx_desc_size); ++ ++ rxd_msdu_end_common = ath10k_htt_rx_desc_get_msdu_end(hw, rxd); ++ if (rxd_msdu_end_common->info0 & + __cpu_to_le32(RX_MSDU_END_INFO0_LAST_MSDU)) + break; + } + + msdu = skb_peek_tail(amsdu); +- rxd = (void *)msdu->data - sizeof(*rxd); +- if (!(rxd->msdu_end.common.info0 & ++ rxd = HTT_RX_BUF_TO_RX_DESC(hw, ++ (void *)msdu->data - hw->rx_desc_ops->rx_desc_size); ++ ++ rxd_msdu_end_common = ath10k_htt_rx_desc_get_msdu_end(hw, rxd); ++ if (!(rxd_msdu_end_common->info0 & + __cpu_to_le32(RX_MSDU_END_INFO0_LAST_MSDU))) { + skb_queue_splice_init(amsdu, list); + return -EAGAIN; +@@ -3194,7 +3319,7 @@ static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) + + while (!skb_queue_empty(&list)) { + __skb_queue_head_init(&amsdu); +- ret = ath10k_htt_rx_extract_amsdu(&list, &amsdu); ++ ret = ath10k_htt_rx_extract_amsdu(&ar->hw_params, &list, &amsdu); + switch (ret) { + case 0: + /* Note: The in-order indication may report interleaved +diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c +index b793eac2cfac..9842a4b2f78f 100644 +--- a/drivers/net/wireless/ath/ath10k/htt_tx.c ++++ b/drivers/net/wireless/ath/ath10k/htt_tx.c +@@ -796,47 +796,26 @@ static int ath10k_htt_send_frag_desc_bank_cfg_64(struct ath10k_htt *htt) + return 0; + } + +-static void ath10k_htt_fill_rx_desc_offset_32(void *rx_ring) ++static void ath10k_htt_fill_rx_desc_offset_32(struct ath10k_hw_params *hw, void *rx_ring) + { + struct htt_rx_ring_setup_ring32 *ring = + (struct htt_rx_ring_setup_ring32 *)rx_ring; + +-#define desc_offset(x) (offsetof(struct htt_rx_desc, x) / 4) +- ring->mac80211_hdr_offset = __cpu_to_le16(desc_offset(rx_hdr_status)); +- ring->msdu_payload_offset = __cpu_to_le16(desc_offset(msdu_payload)); +- ring->ppdu_start_offset = __cpu_to_le16(desc_offset(ppdu_start)); +- ring->ppdu_end_offset = __cpu_to_le16(desc_offset(ppdu_end)); +- ring->mpdu_start_offset = __cpu_to_le16(desc_offset(mpdu_start)); +- ring->mpdu_end_offset = __cpu_to_le16(desc_offset(mpdu_end)); +- ring->msdu_start_offset = __cpu_to_le16(desc_offset(msdu_start)); +- ring->msdu_end_offset = __cpu_to_le16(desc_offset(msdu_end)); +- ring->rx_attention_offset = __cpu_to_le16(desc_offset(attention)); +- ring->frag_info_offset = __cpu_to_le16(desc_offset(frag_info)); +-#undef desc_offset ++ ath10k_htt_rx_desc_get_offsets(hw, &ring->offsets); + } + +-static void ath10k_htt_fill_rx_desc_offset_64(void *rx_ring) ++static void ath10k_htt_fill_rx_desc_offset_64(struct ath10k_hw_params *hw, void *rx_ring) + { + struct htt_rx_ring_setup_ring64 *ring = + (struct htt_rx_ring_setup_ring64 *)rx_ring; + +-#define desc_offset(x) (offsetof(struct htt_rx_desc, x) / 4) +- ring->mac80211_hdr_offset = __cpu_to_le16(desc_offset(rx_hdr_status)); +- ring->msdu_payload_offset = __cpu_to_le16(desc_offset(msdu_payload)); +- ring->ppdu_start_offset = __cpu_to_le16(desc_offset(ppdu_start)); +- ring->ppdu_end_offset = __cpu_to_le16(desc_offset(ppdu_end)); +- ring->mpdu_start_offset = __cpu_to_le16(desc_offset(mpdu_start)); +- ring->mpdu_end_offset = __cpu_to_le16(desc_offset(mpdu_end)); +- ring->msdu_start_offset = __cpu_to_le16(desc_offset(msdu_start)); +- ring->msdu_end_offset = __cpu_to_le16(desc_offset(msdu_end)); +- ring->rx_attention_offset = __cpu_to_le16(desc_offset(attention)); +- ring->frag_info_offset = __cpu_to_le16(desc_offset(frag_info)); +-#undef desc_offset ++ ath10k_htt_rx_desc_get_offsets(hw, &ring->offsets); + } + + static int ath10k_htt_send_rx_ring_cfg_32(struct ath10k_htt *htt) + { + struct ath10k *ar = htt->ar; ++ struct ath10k_hw_params *hw = &ar->hw_params; + struct sk_buff *skb; + struct htt_cmd *cmd; + struct htt_rx_ring_setup_ring32 *ring; +@@ -896,7 +875,7 @@ static int ath10k_htt_send_rx_ring_cfg_32(struct ath10k_htt *htt) + ring->flags = __cpu_to_le16(flags); + ring->fw_idx_init_val = __cpu_to_le16(fw_idx); + +- ath10k_htt_fill_rx_desc_offset_32(ring); ++ ath10k_htt_fill_rx_desc_offset_32(hw, ring); + ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb); + if (ret) { + dev_kfree_skb_any(skb); +@@ -909,6 +888,7 @@ static int ath10k_htt_send_rx_ring_cfg_32(struct ath10k_htt *htt) + static int ath10k_htt_send_rx_ring_cfg_64(struct ath10k_htt *htt) + { + struct ath10k *ar = htt->ar; ++ struct ath10k_hw_params *hw = &ar->hw_params; + struct sk_buff *skb; + struct htt_cmd *cmd; + struct htt_rx_ring_setup_ring64 *ring; +@@ -965,7 +945,7 @@ static int ath10k_htt_send_rx_ring_cfg_64(struct ath10k_htt *htt) + ring->flags = __cpu_to_le16(flags); + ring->fw_idx_init_val = __cpu_to_le16(fw_idx); + +- ath10k_htt_fill_rx_desc_offset_64(ring); ++ ath10k_htt_fill_rx_desc_offset_64(hw, ring); + ret = ath10k_htc_send(&htt->ar->htc, htt->eid, skb); + if (ret) { + dev_kfree_skb_any(skb); +diff --git a/drivers/net/wireless/ath/ath10k/hw.c b/drivers/net/wireless/ath/ath10k/hw.c +index 57c58af64a57..e52e41a70321 100644 +--- a/drivers/net/wireless/ath/ath10k/hw.c ++++ b/drivers/net/wireless/ath/ath10k/hw.c +@@ -11,6 +11,7 @@ + #include "hif.h" + #include "wmi-ops.h" + #include "bmi.h" ++#include "rx_desc.h" + + const struct ath10k_hw_regs qca988x_regs = { + .rtc_soc_base_address = 0x00004000, +@@ -1134,21 +1135,7 @@ const struct ath10k_hw_ops qca988x_ops = { + .is_rssi_enable = ath10k_htt_tx_rssi_enable, + }; + +-static int ath10k_qca99x0_rx_desc_get_l3_pad_bytes(struct htt_rx_desc *rxd) +-{ +- return MS(__le32_to_cpu(rxd->msdu_end.qca99x0.info1), +- RX_MSDU_END_INFO1_L3_HDR_PAD); +-} +- +-static bool ath10k_qca99x0_rx_desc_msdu_limit_error(struct htt_rx_desc *rxd) +-{ +- return !!(rxd->msdu_end.common.info0 & +- __cpu_to_le32(RX_MSDU_END_INFO0_MSDU_LIMIT_ERR)); +-} +- + const struct ath10k_hw_ops qca99x0_ops = { +- .rx_desc_get_l3_pad_bytes = ath10k_qca99x0_rx_desc_get_l3_pad_bytes, +- .rx_desc_get_msdu_limit_error = ath10k_qca99x0_rx_desc_msdu_limit_error, + .is_rssi_enable = ath10k_htt_tx_rssi_enable, + }; + +diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h +index 591ef7416b61..5215a6816d71 100644 +--- a/drivers/net/wireless/ath/ath10k/hw.h ++++ b/drivers/net/wireless/ath/ath10k/hw.h +@@ -510,6 +510,8 @@ struct ath10k_hw_clk_params { + u32 outdiv; + }; + ++struct htt_rx_desc_ops; ++ + struct ath10k_hw_params { + u32 id; + u16 dev_id; +@@ -562,6 +564,9 @@ struct ath10k_hw_params { + */ + bool sw_decrypt_mcast_mgmt; + ++ /* Rx descriptor abstraction */ ++ const struct ath10k_htt_rx_desc_ops *rx_desc_ops; ++ + const struct ath10k_hw_ops *hw_ops; + + /* Number of bytes used for alignment in rx_hdr_status of rx desc. */ +@@ -630,16 +635,14 @@ struct ath10k_hw_params { + bool dynamic_sar_support; + }; + +-struct htt_rx_desc; + struct htt_resp; + struct htt_data_tx_completion_ext; ++struct htt_rx_ring_rx_desc_offsets; + + /* Defines needed for Rx descriptor abstraction */ + struct ath10k_hw_ops { +- int (*rx_desc_get_l3_pad_bytes)(struct htt_rx_desc *rxd); + void (*set_coverage_class)(struct ath10k *ar, s16 value); + int (*enable_pll_clk)(struct ath10k *ar); +- bool (*rx_desc_get_msdu_limit_error)(struct htt_rx_desc *rxd); + int (*tx_data_rssi_pad_bytes)(struct htt_resp *htt); + int (*is_rssi_enable)(struct htt_resp *resp); + }; +@@ -652,24 +655,6 @@ extern const struct ath10k_hw_ops wcn3990_ops; + + extern const struct ath10k_hw_clk_params qca6174_clk[]; + +-static inline int +-ath10k_rx_desc_get_l3_pad_bytes(struct ath10k_hw_params *hw, +- struct htt_rx_desc *rxd) +-{ +- if (hw->hw_ops->rx_desc_get_l3_pad_bytes) +- return hw->hw_ops->rx_desc_get_l3_pad_bytes(rxd); +- return 0; +-} +- +-static inline bool +-ath10k_rx_desc_msdu_limit_error(struct ath10k_hw_params *hw, +- struct htt_rx_desc *rxd) +-{ +- if (hw->hw_ops->rx_desc_get_msdu_limit_error) +- return hw->hw_ops->rx_desc_get_msdu_limit_error(rxd); +- return false; +-} +- + static inline int + ath10k_tx_data_rssi_get_pad_bytes(struct ath10k_hw_params *hw, + struct htt_resp *htt) +diff --git a/drivers/net/wireless/ath/ath10k/rx_desc.h b/drivers/net/wireless/ath/ath10k/rx_desc.h +index 705b6295e466..6ce2a8b1060d 100644 +--- a/drivers/net/wireless/ath/ath10k/rx_desc.h ++++ b/drivers/net/wireless/ath/ath10k/rx_desc.h +@@ -196,17 +196,31 @@ struct rx_attention { + * descriptor. + */ + +-struct rx_frag_info { ++struct rx_frag_info_common { + u8 ring0_more_count; + u8 ring1_more_count; + u8 ring2_more_count; + u8 ring3_more_count; ++} __packed; ++ ++struct rx_frag_info_wcn3990 { + u8 ring4_more_count; + u8 ring5_more_count; + u8 ring6_more_count; + u8 ring7_more_count; + } __packed; + ++struct rx_frag_info { ++ struct rx_frag_info_common common; ++ union { ++ struct rx_frag_info_wcn3990 wcn3990; ++ } __packed; ++} __packed; ++ ++struct rx_frag_info_v1 { ++ struct rx_frag_info_common common; ++} __packed; ++ + /* + * ring0_more_count + * Indicates the number of more buffers associated with RX DMA +@@ -474,11 +488,17 @@ struct rx_msdu_start_wcn3990 { + struct rx_msdu_start { + struct rx_msdu_start_common common; + union { +- struct rx_msdu_start_qca99x0 qca99x0; + struct rx_msdu_start_wcn3990 wcn3990; + } __packed; + } __packed; + ++struct rx_msdu_start_v1 { ++ struct rx_msdu_start_common common; ++ union { ++ struct rx_msdu_start_qca99x0 qca99x0; ++ } __packed; ++} __packed; ++ + /* + * msdu_length + * MSDU length in bytes after decapsulation. This field is +@@ -612,11 +632,17 @@ struct rx_msdu_end_wcn3990 { + struct rx_msdu_end { + struct rx_msdu_end_common common; + union { +- struct rx_msdu_end_qca99x0 qca99x0; + struct rx_msdu_end_wcn3990 wcn3990; + } __packed; + } __packed; + ++struct rx_msdu_end_v1 { ++ struct rx_msdu_end_common common; ++ union { ++ struct rx_msdu_end_qca99x0 qca99x0; ++ } __packed; ++} __packed; ++ + /* + *ip_hdr_chksum + * This can include the IP header checksum or the pseudo header +@@ -1134,13 +1160,19 @@ struct rx_ppdu_end_wcn3990 { + } __packed; + + struct rx_ppdu_end { ++ struct rx_ppdu_end_common common; ++ union { ++ struct rx_ppdu_end_wcn3990 wcn3990; ++ } __packed; ++} __packed; ++ ++struct rx_ppdu_end_v1 { + struct rx_ppdu_end_common common; + union { + struct rx_ppdu_end_qca988x qca988x; + struct rx_ppdu_end_qca6174 qca6174; + struct rx_ppdu_end_qca99x0 qca99x0; + struct rx_ppdu_end_qca9984 qca9984; +- struct rx_ppdu_end_wcn3990 wcn3990; + } __packed; + } __packed; + +-- +2.35.3 + diff --git a/patches.suse/ath10k-fix-pointer-arithmetic-error-in-trace-call.patch b/patches.suse/ath10k-fix-pointer-arithmetic-error-in-trace-call.patch new file mode 100644 index 0000000..723bc50 --- /dev/null +++ b/patches.suse/ath10k-fix-pointer-arithmetic-error-in-trace-call.patch @@ -0,0 +1,67 @@ +From 49ffac5907a8ff30c2cfc6ff9d56fe5c81abb059 Mon Sep 17 00:00:00 2001 +From: Francesco Magliocca +Date: Mon, 21 Feb 2022 13:26:38 +0100 +Subject: [PATCH] ath10k: fix pointer arithmetic error in trace call +Git-commit: 49ffac5907a8ff30c2cfc6ff9d56fe5c81abb059 +Patch-mainline: v5.18-rc1 +References: git-fixes + +Reading through the commit history, it looks like +there is no special need why we must skip the first 4 bytes +in this trace call: + +trace_ath10k_htt_rx_desc(ar, (void*)rx_desc + sizeof(u32), + hw->rx_desc_ops->rx_desc_size - sizeof(u32)); + +found in the function ath10k_htt_rx_amsdu_pop in the file htt_rx.c + +i think the original author +(who is also the one who added rx_desc tracing capabilities +in a0883cf7e75a) just wanted to trace the rx_desc contents, +ignoring the fw_rx_desc_base info field +(which is the part being skipped over). +But the trace_ath10k_htt_rx_desc later added +don't care about skipping it, so it may be good +to uniform this call to the others in the file. +But this would change the output of the trace and +thus it may be a problem for tools that rely on it. +Therefore I propose until further discussion +to just keep it as it is and just fix the pointer arithmetic bug. + +Add missing void* cast to rx descriptor pointer in order to +properly skip the initial 4 bytes of the rx descriptor +when passing it to trace_ath10k_htt_rx_desc trace function. + +This fixes the pointer arithmetic error detected +by Dan Carpenter's static analysis tool. + +Fixes: 6bae9de622d3 ("ath10k: abstract htt_rx_desc structure") + +Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00157-QCARMSWPZ-1 + +Signed-off-by: Francesco Magliocca +Link: https://lore.kernel.org/ath10k/20220201130900.GD22458@kili/ +Signed-off-by: Kalle Valo +Link: https://lore.kernel.org/r/20220221122638.7971-1-franciman12@gmail.com +Acked-by: Takashi Iwai + +--- + drivers/net/wireless/ath/ath10k/htt_rx.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c +index 9ad64ca84beb..771252dd6d4e 100644 +--- a/drivers/net/wireless/ath/ath10k/htt_rx.c ++++ b/drivers/net/wireless/ath/ath10k/htt_rx.c +@@ -429,7 +429,7 @@ static int ath10k_htt_rx_amsdu_pop(struct ath10k_htt *htt, + RX_MSDU_END_INFO0_LAST_MSDU; + + /* FIXME: why are we skipping the first part of the rx_desc? */ +- trace_ath10k_htt_rx_desc(ar, rx_desc + sizeof(u32), ++ trace_ath10k_htt_rx_desc(ar, (void *)rx_desc + sizeof(u32), + hw->rx_desc_ops->rx_desc_size - sizeof(u32)); + + if (last_msdu) +-- +2.35.3 + diff --git a/patches.suse/bcache-add-error-handling-support-for-add_disk.patch b/patches.suse/bcache-add-error-handling-support-for-add_disk.patch new file mode 100644 index 0000000..35c69fa --- /dev/null +++ b/patches.suse/bcache-add-error-handling-support-for-add_disk.patch @@ -0,0 +1,80 @@ +From: Luis Chamberlain +Date: Fri, 15 Oct 2021 16:30:23 -0700 +Subject: [PATCH] bcache: add error handling support for add_disk() +Git-commit: 2961c3bbcaec0ed7fb7b9a465b3796f37f2294e5 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +This driver doesn't do any unwinding with blk_cleanup_disk() +even on errors after add_disk() and so we follow that +tradition. + +Acked-by: Coly Li +Signed-off-by: Luis Chamberlain +Link: https://lore.kernel.org/r/20211015233028.2167651-5-mcgrof@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/md/bcache/super.c | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index dc35f6e1d8d3..84a48eed8e24 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -1080,7 +1080,9 @@ int bch_cached_dev_run(struct cached_dev *dc) + closure_sync(&cl); + } + +- add_disk(d->disk); ++ ret = add_disk(d->disk); ++ if (ret) ++ goto out; + bd_link_disk_holder(dc->bdev, dc->disk.disk); + /* + * won't show up in the uevent file, use udevadm monitor -e instead +@@ -1526,10 +1528,11 @@ static void flash_dev_flush(struct closure *cl) + + static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) + { ++ int err = -ENOMEM; + struct bcache_device *d = kzalloc(sizeof(struct bcache_device), + GFP_KERNEL); + if (!d) +- return -ENOMEM; ++ goto err_ret; + + closure_init(&d->cl, NULL); + set_closure_fn(&d->cl, flash_dev_flush, system_wq); +@@ -1543,9 +1546,12 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) + bcache_device_attach(d, c, u - c->uuids); + bch_sectors_dirty_init(d); + bch_flash_dev_request_init(d); +- add_disk(d->disk); ++ err = add_disk(d->disk); ++ if (err) ++ goto err; + +- if (kobject_add(&d->kobj, &disk_to_dev(d->disk)->kobj, "bcache")) ++ err = kobject_add(&d->kobj, &disk_to_dev(d->disk)->kobj, "bcache"); ++ if (err) + goto err; + + bcache_device_link(d, c, "volume"); +@@ -1559,7 +1565,8 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) + return 0; + err: + kobject_put(&d->kobj); +- return -ENOMEM; ++err_ret: ++ return err; + } + + static int flash_devs_run(struct cache_set *c) +-- +2.35.3 + diff --git a/patches.suse/bcache-remove-bdev_sectors.patch b/patches.suse/bcache-remove-bdev_sectors.patch new file mode 100644 index 0000000..8dd4ff8 --- /dev/null +++ b/patches.suse/bcache-remove-bdev_sectors.patch @@ -0,0 +1,64 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:03 +0200 +Subject: [PATCH] bcache: remove bdev_sectors +Git-commit: cda25b82c47496f2da0785af5a0aa72a8990cec2 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the equivalent block layer helper instead. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Reviewed-by: Chaitanya Kulkarni +Acked-by: Coly Li +Link: https://lore.kernel.org/r/20211018101130.1838532-4-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/md/bcache/super.c | 2 +- + drivers/md/bcache/util.h | 4 ---- + drivers/md/bcache/writeback.c | 2 +- + 3 files changed, 2 insertions(+), 6 deletions(-) + +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index f2874c77ff79..4f89985abe4b 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -1002,7 +1002,7 @@ static void calc_cached_dev_sectors(struct cache_set *c) + struct cached_dev *dc; + + list_for_each_entry(dc, &c->cached_devs, list) +- sectors += bdev_sectors(dc->bdev); ++ sectors += bdev_nr_sectors(dc->bdev); + + c->cached_dev_sectors = sectors; + } +diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h +index b64460a76267..a7da7930a7fd 100644 +--- a/drivers/md/bcache/util.h ++++ b/drivers/md/bcache/util.h +@@ -584,8 +584,4 @@ static inline unsigned int fract_exp_two(unsigned int x, + void bch_bio_map(struct bio *bio, void *base); + int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp_mask); + +-static inline sector_t bdev_sectors(struct block_device *bdev) +-{ +- return bdev->bd_inode->i_size >> 9; +-} + #endif /* _BCACHE_UTIL_H */ +diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c +index 8120da278161..c7560f66dca8 100644 +--- a/drivers/md/bcache/writeback.c ++++ b/drivers/md/bcache/writeback.c +@@ -45,7 +45,7 @@ static uint64_t __calc_target_rate(struct cached_dev *dc) + * backing volume uses about 2% of the cache for dirty data. + */ + uint32_t bdev_share = +- div64_u64(bdev_sectors(dc->bdev) << WRITEBACK_SHARE_SHIFT, ++ div64_u64(bdev_nr_sectors(dc->bdev) << WRITEBACK_SHARE_SHIFT, + c->cached_dev_sectors); + + uint64_t cache_dirty_target = +-- +2.35.3 + diff --git a/patches.suse/blk-cgroup-set-blkg-iostat-after-percpu-stat-aggrega.patch b/patches.suse/blk-cgroup-set-blkg-iostat-after-percpu-stat-aggrega.patch index 929b471..d37e477 100644 --- a/patches.suse/blk-cgroup-set-blkg-iostat-after-percpu-stat-aggrega.patch +++ b/patches.suse/blk-cgroup-set-blkg-iostat-after-percpu-stat-aggrega.patch @@ -23,7 +23,7 @@ Acked-by: Jan Kara --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -855,11 +855,11 @@ static void blkcg_fill_root_iostats(void - blk_queue_root_blkg(bdev->bd_disk->queue); + blk_queue_root_blkg(bdev_get_queue(bdev)); struct blkg_iostat tmp; int cpu; + unsigned long flags; diff --git a/patches.suse/blk-cgroup-synchronize-blkg-creation-against-policy-.patch b/patches.suse/blk-cgroup-synchronize-blkg-creation-against-policy-.patch index 1bea5fc..c03999c 100644 --- a/patches.suse/blk-cgroup-synchronize-blkg-creation-against-policy-.patch +++ b/patches.suse/blk-cgroup-synchronize-blkg-creation-against-policy-.patch @@ -129,7 +129,7 @@ index 8e4dcf6036f6..8d9041e0f4be 100644 +++ b/block/blk-cgroup.c @@ -634,6 +634,14 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, - q = bdev->bd_disk->queue; + q = bdev_get_queue(bdev); + /* + * blkcg_deactivate_policy() requires queue to be frozen, we can grab diff --git a/patches.suse/blk-crypto-fallback-properly-prefix-function-and-str.patch b/patches.suse/blk-crypto-fallback-properly-prefix-function-and-str.patch new file mode 100644 index 0000000..6aef0c8 --- /dev/null +++ b/patches.suse/blk-crypto-fallback-properly-prefix-function-and-str.patch @@ -0,0 +1,209 @@ +From: Eric Biggers +Date: Mon, 18 Oct 2021 11:04:50 -0700 +Subject: [PATCH] blk-crypto-fallback: properly prefix function and struct + names +Git-commit: eebcafaebb17cb8fda671709fab5dd836bdc3a08 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +For clarity, avoid using just the "blk_crypto_" prefix for functions and +structs that are specific to blk-crypto-fallback. Instead, use +"blk_crypto_fallback_". Some places already did this, but others +didn't. + +This is also a prerequisite for using "struct blk_crypto_keyslot" to +mean a generic blk-crypto keyslot (which is what it sounds like). +Rename the fallback one to "struct blk_crypto_fallback_keyslot". + +No change in behavior. + +Reviewed-by: Christoph Hellwig +Reviewed-by: Chaitanya Kulkarni +Reviewed-by: Mike Snitzer +Reviewed-by: Martin K. Petersen +Signed-off-by: Eric Biggers +Link: https://lore.kernel.org/r/20211018180453.40441-2-ebiggers@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-crypto-fallback.c | 59 +++++++++++++++++++------------------ + 1 file changed, 30 insertions(+), 29 deletions(-) + +diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c +index ec4c7823541c..1bcc1a151424 100644 +--- a/block/blk-crypto-fallback.c ++++ b/block/blk-crypto-fallback.c +@@ -73,7 +73,7 @@ static mempool_t *bio_fallback_crypt_ctx_pool; + static DEFINE_MUTEX(tfms_init_lock); + static bool tfms_inited[BLK_ENCRYPTION_MODE_MAX]; + +-static struct blk_crypto_keyslot { ++static struct blk_crypto_fallback_keyslot { + enum blk_crypto_mode_num crypto_mode; + struct crypto_skcipher *tfms[BLK_ENCRYPTION_MODE_MAX]; + } *blk_crypto_keyslots; +@@ -89,9 +89,9 @@ static struct bio_set crypto_bio_split; + */ + static u8 blank_key[BLK_CRYPTO_MAX_KEY_SIZE]; + +-static void blk_crypto_evict_keyslot(unsigned int slot) ++static void blk_crypto_fallback_evict_keyslot(unsigned int slot) + { +- struct blk_crypto_keyslot *slotp = &blk_crypto_keyslots[slot]; ++ struct blk_crypto_fallback_keyslot *slotp = &blk_crypto_keyslots[slot]; + enum blk_crypto_mode_num crypto_mode = slotp->crypto_mode; + int err; + +@@ -104,34 +104,34 @@ static void blk_crypto_evict_keyslot(unsigned int slot) + slotp->crypto_mode = BLK_ENCRYPTION_MODE_INVALID; + } + +-static int blk_crypto_keyslot_program(struct blk_keyslot_manager *ksm, +- const struct blk_crypto_key *key, +- unsigned int slot) ++static int blk_crypto_fallback_keyslot_program(struct blk_keyslot_manager *ksm, ++ const struct blk_crypto_key *key, ++ unsigned int slot) + { +- struct blk_crypto_keyslot *slotp = &blk_crypto_keyslots[slot]; ++ struct blk_crypto_fallback_keyslot *slotp = &blk_crypto_keyslots[slot]; + const enum blk_crypto_mode_num crypto_mode = + key->crypto_cfg.crypto_mode; + int err; + + if (crypto_mode != slotp->crypto_mode && + slotp->crypto_mode != BLK_ENCRYPTION_MODE_INVALID) +- blk_crypto_evict_keyslot(slot); ++ blk_crypto_fallback_evict_keyslot(slot); + + slotp->crypto_mode = crypto_mode; + err = crypto_skcipher_setkey(slotp->tfms[crypto_mode], key->raw, + key->size); + if (err) { +- blk_crypto_evict_keyslot(slot); ++ blk_crypto_fallback_evict_keyslot(slot); + return err; + } + return 0; + } + +-static int blk_crypto_keyslot_evict(struct blk_keyslot_manager *ksm, +- const struct blk_crypto_key *key, +- unsigned int slot) ++static int blk_crypto_fallback_keyslot_evict(struct blk_keyslot_manager *ksm, ++ const struct blk_crypto_key *key, ++ unsigned int slot) + { +- blk_crypto_evict_keyslot(slot); ++ blk_crypto_fallback_evict_keyslot(slot); + return 0; + } + +@@ -141,8 +141,8 @@ static int blk_crypto_keyslot_evict(struct blk_keyslot_manager *ksm, + * hardware. + */ + static const struct blk_ksm_ll_ops blk_crypto_ksm_ll_ops = { +- .keyslot_program = blk_crypto_keyslot_program, +- .keyslot_evict = blk_crypto_keyslot_evict, ++ .keyslot_program = blk_crypto_fallback_keyslot_program, ++ .keyslot_evict = blk_crypto_fallback_keyslot_evict, + }; + + static void blk_crypto_fallback_encrypt_endio(struct bio *enc_bio) +@@ -160,7 +160,7 @@ static void blk_crypto_fallback_encrypt_endio(struct bio *enc_bio) + bio_endio(src_bio); + } + +-static struct bio *blk_crypto_clone_bio(struct bio *bio_src) ++static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src) + { + struct bvec_iter iter; + struct bio_vec bv; +@@ -187,12 +187,13 @@ static struct bio *blk_crypto_clone_bio(struct bio *bio_src) + return bio; + } + +-static bool blk_crypto_alloc_cipher_req(struct blk_ksm_keyslot *slot, +- struct skcipher_request **ciph_req_ret, +- struct crypto_wait *wait) ++static bool ++blk_crypto_fallback_alloc_cipher_req(struct blk_ksm_keyslot *slot, ++ struct skcipher_request **ciph_req_ret, ++ struct crypto_wait *wait) + { + struct skcipher_request *ciph_req; +- const struct blk_crypto_keyslot *slotp; ++ const struct blk_crypto_fallback_keyslot *slotp; + int keyslot_idx = blk_ksm_get_slot_idx(slot); + + slotp = &blk_crypto_keyslots[keyslot_idx]; +@@ -210,7 +211,7 @@ static bool blk_crypto_alloc_cipher_req(struct blk_ksm_keyslot *slot, + return true; + } + +-static bool blk_crypto_split_bio_if_needed(struct bio **bio_ptr) ++static bool blk_crypto_fallback_split_bio_if_needed(struct bio **bio_ptr) + { + struct bio *bio = *bio_ptr; + unsigned int i = 0; +@@ -277,7 +278,7 @@ static bool blk_crypto_fallback_encrypt_bio(struct bio **bio_ptr) + blk_status_t blk_st; + + /* Split the bio if it's too big for single page bvec */ +- if (!blk_crypto_split_bio_if_needed(bio_ptr)) ++ if (!blk_crypto_fallback_split_bio_if_needed(bio_ptr)) + return false; + + src_bio = *bio_ptr; +@@ -285,7 +286,7 @@ static bool blk_crypto_fallback_encrypt_bio(struct bio **bio_ptr) + data_unit_size = bc->bc_key->crypto_cfg.data_unit_size; + + /* Allocate bounce bio for encryption */ +- enc_bio = blk_crypto_clone_bio(src_bio); ++ enc_bio = blk_crypto_fallback_clone_bio(src_bio); + if (!enc_bio) { + src_bio->bi_status = BLK_STS_RESOURCE; + return false; +@@ -302,7 +303,7 @@ static bool blk_crypto_fallback_encrypt_bio(struct bio **bio_ptr) + } + + /* and then allocate an skcipher_request for it */ +- if (!blk_crypto_alloc_cipher_req(slot, &ciph_req, &wait)) { ++ if (!blk_crypto_fallback_alloc_cipher_req(slot, &ciph_req, &wait)) { + src_bio->bi_status = BLK_STS_RESOURCE; + goto out_release_keyslot; + } +@@ -404,7 +405,7 @@ static void blk_crypto_fallback_decrypt_bio(struct work_struct *work) + } + + /* and then allocate an skcipher_request for it */ +- if (!blk_crypto_alloc_cipher_req(slot, &ciph_req, &wait)) { ++ if (!blk_crypto_fallback_alloc_cipher_req(slot, &ciph_req, &wait)) { + bio->bi_status = BLK_STS_RESOURCE; + goto out; + } +@@ -474,9 +475,9 @@ static void blk_crypto_fallback_decrypt_endio(struct bio *bio) + * @bio_ptr: pointer to the bio to prepare + * + * If bio is doing a WRITE operation, this splits the bio into two parts if it's +- * too big (see blk_crypto_split_bio_if_needed). It then allocates a bounce bio +- * for the first part, encrypts it, and update bio_ptr to point to the bounce +- * bio. ++ * too big (see blk_crypto_fallback_split_bio_if_needed()). It then allocates a ++ * bounce bio for the first part, encrypts it, and updates bio_ptr to point to ++ * the bounce bio. + * + * For a READ operation, we mark the bio for decryption by using bi_private and + * bi_end_io. +@@ -611,7 +612,7 @@ static int blk_crypto_fallback_init(void) + int blk_crypto_fallback_start_using_mode(enum blk_crypto_mode_num mode_num) + { + const char *cipher_str = blk_crypto_modes[mode_num].cipher_str; +- struct blk_crypto_keyslot *slotp; ++ struct blk_crypto_fallback_keyslot *slotp; + unsigned int i; + int err = 0; + +-- +2.35.3 + diff --git a/patches.suse/blk-crypto-rename-blk_keyslot_manager-to-blk_crypto_.patch b/patches.suse/blk-crypto-rename-blk_keyslot_manager-to-blk_crypto_.patch new file mode 100644 index 0000000..654f887 --- /dev/null +++ b/patches.suse/blk-crypto-rename-blk_keyslot_manager-to-blk_crypto_.patch @@ -0,0 +1,2150 @@ +From: Eric Biggers +Date: Mon, 18 Oct 2021 11:04:52 -0700 +Subject: [PATCH] blk-crypto: rename blk_keyslot_manager to blk_crypto_profile +Git-commit: cb77cb5abe1f4fae4a33b735606aae22f9eaa1c7 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +blk_keyslot_manager is misnamed because it doesn't necessarily manage +keyslots. It actually does several different things: + + - Contains the crypto capabilities of the device. + + - Provides functions to control the inline encryption hardware. + Originally these were just for programming/evicting keyslots; + however, new functionality (hardware-wrapped keys) will require new + functions here which are unrelated to keyslots. Moreover, + device-mapper devices already (ab)use "keyslot_evict" to pass key + eviction requests to their underlying devices even though + device-mapper devices don't have any keyslots themselves (so it + really should be "evict_key", not "keyslot_evict"). + + - Sometimes (but not always!) it manages keyslots. Originally it + always did, but device-mapper devices don't have keyslots + themselves, so they use a "passthrough keyslot manager" which + doesn't actually manage keyslots. This hack works, but the + terminology is unnatural. Also, some hardware doesn't have keyslots + and thus also uses a "passthrough keyslot manager" (support for such + hardware is yet to be upstreamed, but it will happen eventually). + +Let's stop having keyslot managers which don't actually manage keyslots. +Instead, rename blk_keyslot_manager to blk_crypto_profile. + +This is a fairly big change, since for consistency it also has to update +keyslot manager-related function names, variable names, and comments -- +not just the actual struct name. However it's still a fairly +straightforward change, as it doesn't change any actual functionality. + +Acked-by: Ulf Hansson # For MMC +Reviewed-by: Mike Snitzer +Reviewed-by: Martin K. Petersen +Signed-off-by: Eric Biggers +Link: https://lore.kernel.org/r/20211018180453.40441-4-ebiggers@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-crypto-fallback.c | 71 ++-- + block/blk-crypto-profile.c | 520 ++++++++++++++--------------- + block/blk-crypto.c | 27 +- + block/blk-integrity.c | 4 +- + drivers/md/dm-core.h | 2 +- + drivers/md/dm-table.c | 168 +++++----- + drivers/md/dm.c | 8 +- + drivers/mmc/core/crypto.c | 11 +- + drivers/mmc/host/cqhci-crypto.c | 31 +- + drivers/scsi/ufs/ufshcd-crypto.c | 32 +- + drivers/scsi/ufs/ufshcd-crypto.h | 9 +- + drivers/scsi/ufs/ufshcd.c | 2 +- + drivers/scsi/ufs/ufshcd.h | 4 +- + include/linux/blk-crypto-profile.h | 164 +++++---- + include/linux/blk-mq.h | 2 +- + include/linux/blkdev.h | 16 +- + include/linux/device-mapper.h | 4 +- + include/linux/mmc/host.h | 2 +- + 18 files changed, 555 insertions(+), 522 deletions(-) + +diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c +index 08bfea292c75..c87aba8584c6 100644 +--- a/block/blk-crypto-fallback.c ++++ b/block/blk-crypto-fallback.c +@@ -78,7 +78,7 @@ static struct blk_crypto_fallback_keyslot { + struct crypto_skcipher *tfms[BLK_ENCRYPTION_MODE_MAX]; + } *blk_crypto_keyslots; + +-static struct blk_keyslot_manager blk_crypto_ksm; ++static struct blk_crypto_profile blk_crypto_fallback_profile; + static struct workqueue_struct *blk_crypto_wq; + static mempool_t *blk_crypto_bounce_page_pool; + static struct bio_set crypto_bio_split; +@@ -104,9 +104,10 @@ static void blk_crypto_fallback_evict_keyslot(unsigned int slot) + slotp->crypto_mode = BLK_ENCRYPTION_MODE_INVALID; + } + +-static int blk_crypto_fallback_keyslot_program(struct blk_keyslot_manager *ksm, +- const struct blk_crypto_key *key, +- unsigned int slot) ++static int ++blk_crypto_fallback_keyslot_program(struct blk_crypto_profile *profile, ++ const struct blk_crypto_key *key, ++ unsigned int slot) + { + struct blk_crypto_fallback_keyslot *slotp = &blk_crypto_keyslots[slot]; + const enum blk_crypto_mode_num crypto_mode = +@@ -127,7 +128,7 @@ static int blk_crypto_fallback_keyslot_program(struct blk_keyslot_manager *ksm, + return 0; + } + +-static int blk_crypto_fallback_keyslot_evict(struct blk_keyslot_manager *ksm, ++static int blk_crypto_fallback_keyslot_evict(struct blk_crypto_profile *profile, + const struct blk_crypto_key *key, + unsigned int slot) + { +@@ -135,14 +136,9 @@ static int blk_crypto_fallback_keyslot_evict(struct blk_keyslot_manager *ksm, + return 0; + } + +-/* +- * The crypto API fallback KSM ops - only used for a bio when it specifies a +- * blk_crypto_key that was not supported by the device's inline encryption +- * hardware. +- */ +-static const struct blk_ksm_ll_ops blk_crypto_ksm_ll_ops = { +- .keyslot_program = blk_crypto_fallback_keyslot_program, +- .keyslot_evict = blk_crypto_fallback_keyslot_evict, ++static const struct blk_crypto_ll_ops blk_crypto_fallback_ll_ops = { ++ .keyslot_program = blk_crypto_fallback_keyslot_program, ++ .keyslot_evict = blk_crypto_fallback_keyslot_evict, + }; + + static void blk_crypto_fallback_encrypt_endio(struct bio *enc_bio) +@@ -188,13 +184,13 @@ static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src) + } + + static bool +-blk_crypto_fallback_alloc_cipher_req(struct blk_ksm_keyslot *slot, ++blk_crypto_fallback_alloc_cipher_req(struct blk_crypto_keyslot *slot, + struct skcipher_request **ciph_req_ret, + struct crypto_wait *wait) + { + struct skcipher_request *ciph_req; + const struct blk_crypto_fallback_keyslot *slotp; +- int keyslot_idx = blk_ksm_get_slot_idx(slot); ++ int keyslot_idx = blk_crypto_keyslot_index(slot); + + slotp = &blk_crypto_keyslots[keyslot_idx]; + ciph_req = skcipher_request_alloc(slotp->tfms[slotp->crypto_mode], +@@ -266,7 +262,7 @@ static bool blk_crypto_fallback_encrypt_bio(struct bio **bio_ptr) + { + struct bio *src_bio, *enc_bio; + struct bio_crypt_ctx *bc; +- struct blk_ksm_keyslot *slot; ++ struct blk_crypto_keyslot *slot; + int data_unit_size; + struct skcipher_request *ciph_req = NULL; + DECLARE_CRYPTO_WAIT(wait); +@@ -293,10 +289,11 @@ static bool blk_crypto_fallback_encrypt_bio(struct bio **bio_ptr) + } + + /* +- * Use the crypto API fallback keyslot manager to get a crypto_skcipher +- * for the algorithm and key specified for this bio. ++ * Get a blk-crypto-fallback keyslot that contains a crypto_skcipher for ++ * this bio's algorithm and key. + */ +- blk_st = blk_ksm_get_slot_for_key(&blk_crypto_ksm, bc->bc_key, &slot); ++ blk_st = blk_crypto_get_keyslot(&blk_crypto_fallback_profile, ++ bc->bc_key, &slot); + if (blk_st != BLK_STS_OK) { + src_bio->bi_status = blk_st; + goto out_put_enc_bio; +@@ -364,7 +361,7 @@ static bool blk_crypto_fallback_encrypt_bio(struct bio **bio_ptr) + out_free_ciph_req: + skcipher_request_free(ciph_req); + out_release_keyslot: +- blk_ksm_put_slot(slot); ++ blk_crypto_put_keyslot(slot); + out_put_enc_bio: + if (enc_bio) + bio_put(enc_bio); +@@ -382,7 +379,7 @@ static void blk_crypto_fallback_decrypt_bio(struct work_struct *work) + container_of(work, struct bio_fallback_crypt_ctx, work); + struct bio *bio = f_ctx->bio; + struct bio_crypt_ctx *bc = &f_ctx->crypt_ctx; +- struct blk_ksm_keyslot *slot; ++ struct blk_crypto_keyslot *slot; + struct skcipher_request *ciph_req = NULL; + DECLARE_CRYPTO_WAIT(wait); + u64 curr_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; +@@ -395,10 +392,11 @@ static void blk_crypto_fallback_decrypt_bio(struct work_struct *work) + blk_status_t blk_st; + + /* +- * Use the crypto API fallback keyslot manager to get a crypto_skcipher +- * for the algorithm and key specified for this bio. ++ * Get a blk-crypto-fallback keyslot that contains a crypto_skcipher for ++ * this bio's algorithm and key. + */ +- blk_st = blk_ksm_get_slot_for_key(&blk_crypto_ksm, bc->bc_key, &slot); ++ blk_st = blk_crypto_get_keyslot(&blk_crypto_fallback_profile, ++ bc->bc_key, &slot); + if (blk_st != BLK_STS_OK) { + bio->bi_status = blk_st; + goto out_no_keyslot; +@@ -436,7 +434,7 @@ static void blk_crypto_fallback_decrypt_bio(struct work_struct *work) + + out: + skcipher_request_free(ciph_req); +- blk_ksm_put_slot(slot); ++ blk_crypto_put_keyslot(slot); + out_no_keyslot: + mempool_free(f_ctx, bio_fallback_crypt_ctx_pool); + bio_endio(bio); +@@ -501,8 +499,8 @@ bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr) + return false; + } + +- if (!blk_ksm_crypto_cfg_supported(&blk_crypto_ksm, +- &bc->bc_key->crypto_cfg)) { ++ if (!__blk_crypto_cfg_supported(&blk_crypto_fallback_profile, ++ &bc->bc_key->crypto_cfg)) { + bio->bi_status = BLK_STS_NOTSUPP; + return false; + } +@@ -528,7 +526,7 @@ bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr) + + int blk_crypto_fallback_evict_key(const struct blk_crypto_key *key) + { +- return blk_ksm_evict_key(&blk_crypto_ksm, key); ++ return __blk_crypto_evict_key(&blk_crypto_fallback_profile, key); + } + + static bool blk_crypto_fallback_inited; +@@ -536,6 +534,7 @@ static int blk_crypto_fallback_init(void) + { + int i; + int err; ++ struct blk_crypto_profile *profile = &blk_crypto_fallback_profile; + + if (blk_crypto_fallback_inited) + return 0; +@@ -546,24 +545,24 @@ static int blk_crypto_fallback_init(void) + if (err) + goto out; + +- err = blk_ksm_init(&blk_crypto_ksm, blk_crypto_num_keyslots); ++ err = blk_crypto_profile_init(profile, blk_crypto_num_keyslots); + if (err) + goto fail_free_bioset; + err = -ENOMEM; + +- blk_crypto_ksm.ksm_ll_ops = blk_crypto_ksm_ll_ops; +- blk_crypto_ksm.max_dun_bytes_supported = BLK_CRYPTO_MAX_IV_SIZE; ++ profile->ll_ops = blk_crypto_fallback_ll_ops; ++ profile->max_dun_bytes_supported = BLK_CRYPTO_MAX_IV_SIZE; + + /* All blk-crypto modes have a crypto API fallback. */ + for (i = 0; i < BLK_ENCRYPTION_MODE_MAX; i++) +- blk_crypto_ksm.crypto_modes_supported[i] = 0xFFFFFFFF; +- blk_crypto_ksm.crypto_modes_supported[BLK_ENCRYPTION_MODE_INVALID] = 0; ++ profile->modes_supported[i] = 0xFFFFFFFF; ++ profile->modes_supported[BLK_ENCRYPTION_MODE_INVALID] = 0; + + blk_crypto_wq = alloc_workqueue("blk_crypto_wq", + WQ_UNBOUND | WQ_HIGHPRI | + WQ_MEM_RECLAIM, num_online_cpus()); + if (!blk_crypto_wq) +- goto fail_free_ksm; ++ goto fail_destroy_profile; + + blk_crypto_keyslots = kcalloc(blk_crypto_num_keyslots, + sizeof(blk_crypto_keyslots[0]), +@@ -597,8 +596,8 @@ static int blk_crypto_fallback_init(void) + kfree(blk_crypto_keyslots); + fail_free_wq: + destroy_workqueue(blk_crypto_wq); +-fail_free_ksm: +- blk_ksm_destroy(&blk_crypto_ksm); ++fail_destroy_profile: ++ blk_crypto_profile_destroy(profile); + fail_free_bioset: + bioset_exit(&crypto_bio_split); + out: +diff --git a/block/blk-crypto-profile.c b/block/blk-crypto-profile.c +index 1a235fa3c3e8..605ba0626a5c 100644 +--- a/block/blk-crypto-profile.c ++++ b/block/blk-crypto-profile.c +@@ -4,26 +4,22 @@ + */ + + /** +- * DOC: The Keyslot Manager ++ * DOC: blk-crypto profiles + * +- * Many devices with inline encryption support have a limited number of "slots" +- * into which encryption contexts may be programmed, and requests can be tagged +- * with a slot number to specify the key to use for en/decryption. ++ * 'struct blk_crypto_profile' contains all generic inline encryption-related ++ * state for a particular inline encryption device. blk_crypto_profile serves ++ * as the way that drivers for inline encryption hardware expose their crypto ++ * capabilities and certain functions (e.g., functions to program and evict ++ * keys) to upper layers. Device drivers that want to support inline encryption ++ * construct a crypto profile, then associate it with the disk's request_queue. + * +- * As the number of slots is limited, and programming keys is expensive on +- * many inline encryption hardware, we don't want to program the same key into +- * multiple slots - if multiple requests are using the same key, we want to +- * program just one slot with that key and use that slot for all requests. ++ * If the device has keyslots, then its blk_crypto_profile also handles managing ++ * these keyslots in a device-independent way, using the driver-provided ++ * functions to program and evict keys as needed. This includes keeping track ++ * of which key and how many I/O requests are using each keyslot, getting ++ * keyslots for I/O requests, and handling key eviction requests. + * +- * The keyslot manager manages these keyslots appropriately, and also acts as +- * an abstraction between the inline encryption hardware and the upper layers. +- * +- * Lower layer devices will set up a keyslot manager in their request queue +- * and tell it how to perform device specific operations like programming/ +- * evicting keys from keyslots. +- * +- * Upper layers will call blk_ksm_get_slot_for_key() to program a +- * key into some slot in the inline encryption hardware. ++ * For more information, see Documentation/block/inline-encryption.rst. + */ + + #define pr_fmt(fmt) "blk-crypto: " fmt +@@ -37,77 +33,75 @@ + #include + #include + +-struct blk_ksm_keyslot { ++struct blk_crypto_keyslot { + atomic_t slot_refs; + struct list_head idle_slot_node; + struct hlist_node hash_node; + const struct blk_crypto_key *key; +- struct blk_keyslot_manager *ksm; ++ struct blk_crypto_profile *profile; + }; + +-static inline void blk_ksm_hw_enter(struct blk_keyslot_manager *ksm) ++static inline void blk_crypto_hw_enter(struct blk_crypto_profile *profile) + { + /* +- * Calling into the driver requires ksm->lock held and the device ++ * Calling into the driver requires profile->lock held and the device + * resumed. But we must resume the device first, since that can acquire +- * and release ksm->lock via blk_ksm_reprogram_all_keys(). ++ * and release profile->lock via blk_crypto_reprogram_all_keys(). + */ +- if (ksm->dev) +- pm_runtime_get_sync(ksm->dev); +- down_write(&ksm->lock); ++ if (profile->dev) ++ pm_runtime_get_sync(profile->dev); ++ down_write(&profile->lock); + } + +-static inline void blk_ksm_hw_exit(struct blk_keyslot_manager *ksm) ++static inline void blk_crypto_hw_exit(struct blk_crypto_profile *profile) + { +- up_write(&ksm->lock); +- if (ksm->dev) +- pm_runtime_put_sync(ksm->dev); +-} +- +-static inline bool blk_ksm_is_passthrough(struct blk_keyslot_manager *ksm) +-{ +- return ksm->num_slots == 0; ++ up_write(&profile->lock); ++ if (profile->dev) ++ pm_runtime_put_sync(profile->dev); + } + + /** +- * blk_ksm_init() - Initialize a keyslot manager +- * @ksm: The keyslot_manager to initialize. +- * @num_slots: The number of key slots to manage. ++ * blk_crypto_profile_init() - Initialize a blk_crypto_profile ++ * @profile: the blk_crypto_profile to initialize ++ * @num_slots: the number of keyslots + * +- * Allocate memory for keyslots and initialize a keyslot manager. Called by +- * e.g. storage drivers to set up a keyslot manager in their request_queue. ++ * Storage drivers must call this when starting to set up a blk_crypto_profile, ++ * before filling in additional fields. + * + * Return: 0 on success, or else a negative error code. + */ +-int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots) ++int blk_crypto_profile_init(struct blk_crypto_profile *profile, ++ unsigned int num_slots) + { + unsigned int slot; + unsigned int i; + unsigned int slot_hashtable_size; + +- memset(ksm, 0, sizeof(*ksm)); ++ memset(profile, 0, sizeof(*profile)); ++ init_rwsem(&profile->lock); + + if (num_slots == 0) +- return -EINVAL; ++ return 0; + +- ksm->slots = kvcalloc(num_slots, sizeof(ksm->slots[0]), GFP_KERNEL); +- if (!ksm->slots) +- return -ENOMEM; ++ /* Initialize keyslot management data. */ + +- ksm->num_slots = num_slots; ++ profile->slots = kvcalloc(num_slots, sizeof(profile->slots[0]), ++ GFP_KERNEL); ++ if (!profile->slots) ++ return -ENOMEM; + +- init_rwsem(&ksm->lock); ++ profile->num_slots = num_slots; + +- init_waitqueue_head(&ksm->idle_slots_wait_queue); +- INIT_LIST_HEAD(&ksm->idle_slots); ++ init_waitqueue_head(&profile->idle_slots_wait_queue); ++ INIT_LIST_HEAD(&profile->idle_slots); + + for (slot = 0; slot < num_slots; slot++) { +- ksm->slots[slot].ksm = ksm; +- list_add_tail(&ksm->slots[slot].idle_slot_node, +- &ksm->idle_slots); ++ profile->slots[slot].profile = profile; ++ list_add_tail(&profile->slots[slot].idle_slot_node, ++ &profile->idle_slots); + } + +- spin_lock_init(&ksm->idle_slots_lock); ++ spin_lock_init(&profile->idle_slots_lock); + + slot_hashtable_size = roundup_pow_of_two(num_slots); + /* +@@ -117,74 +111,80 @@ int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots) + if (slot_hashtable_size < 2) + slot_hashtable_size = 2; + +- ksm->log_slot_ht_size = ilog2(slot_hashtable_size); +- ksm->slot_hashtable = kvmalloc_array(slot_hashtable_size, +- sizeof(ksm->slot_hashtable[0]), +- GFP_KERNEL); +- if (!ksm->slot_hashtable) +- goto err_destroy_ksm; ++ profile->log_slot_ht_size = ilog2(slot_hashtable_size); ++ profile->slot_hashtable = ++ kvmalloc_array(slot_hashtable_size, ++ sizeof(profile->slot_hashtable[0]), GFP_KERNEL); ++ if (!profile->slot_hashtable) ++ goto err_destroy; + for (i = 0; i < slot_hashtable_size; i++) +- INIT_HLIST_HEAD(&ksm->slot_hashtable[i]); ++ INIT_HLIST_HEAD(&profile->slot_hashtable[i]); + + return 0; + +-err_destroy_ksm: +- blk_ksm_destroy(ksm); ++err_destroy: ++ blk_crypto_profile_destroy(profile); + return -ENOMEM; + } +-EXPORT_SYMBOL_GPL(blk_ksm_init); ++EXPORT_SYMBOL_GPL(blk_crypto_profile_init); + +-static void blk_ksm_destroy_callback(void *ksm) ++static void blk_crypto_profile_destroy_callback(void *profile) + { +- blk_ksm_destroy(ksm); ++ blk_crypto_profile_destroy(profile); + } + + /** +- * devm_blk_ksm_init() - Resource-managed blk_ksm_init() +- * @dev: The device which owns the blk_keyslot_manager. +- * @ksm: The blk_keyslot_manager to initialize. +- * @num_slots: The number of key slots to manage. ++ * devm_blk_crypto_profile_init() - Resource-managed blk_crypto_profile_init() ++ * @dev: the device which owns the blk_crypto_profile ++ * @profile: the blk_crypto_profile to initialize ++ * @num_slots: the number of keyslots + * +- * Like blk_ksm_init(), but causes blk_ksm_destroy() to be called automatically +- * on driver detach. ++ * Like blk_crypto_profile_init(), but causes blk_crypto_profile_destroy() to be ++ * called automatically on driver detach. + * + * Return: 0 on success, or else a negative error code. + */ +-int devm_blk_ksm_init(struct device *dev, struct blk_keyslot_manager *ksm, +- unsigned int num_slots) ++int devm_blk_crypto_profile_init(struct device *dev, ++ struct blk_crypto_profile *profile, ++ unsigned int num_slots) + { +- int err = blk_ksm_init(ksm, num_slots); ++ int err = blk_crypto_profile_init(profile, num_slots); + + if (err) + return err; + +- return devm_add_action_or_reset(dev, blk_ksm_destroy_callback, ksm); ++ return devm_add_action_or_reset(dev, ++ blk_crypto_profile_destroy_callback, ++ profile); + } +-EXPORT_SYMBOL_GPL(devm_blk_ksm_init); ++EXPORT_SYMBOL_GPL(devm_blk_crypto_profile_init); + + static inline struct hlist_head * +-blk_ksm_hash_bucket_for_key(struct blk_keyslot_manager *ksm, +- const struct blk_crypto_key *key) ++blk_crypto_hash_bucket_for_key(struct blk_crypto_profile *profile, ++ const struct blk_crypto_key *key) + { +- return &ksm->slot_hashtable[hash_ptr(key, ksm->log_slot_ht_size)]; ++ return &profile->slot_hashtable[ ++ hash_ptr(key, profile->log_slot_ht_size)]; + } + +-static void blk_ksm_remove_slot_from_lru_list(struct blk_ksm_keyslot *slot) ++static void ++blk_crypto_remove_slot_from_lru_list(struct blk_crypto_keyslot *slot) + { +- struct blk_keyslot_manager *ksm = slot->ksm; ++ struct blk_crypto_profile *profile = slot->profile; + unsigned long flags; + +- spin_lock_irqsave(&ksm->idle_slots_lock, flags); ++ spin_lock_irqsave(&profile->idle_slots_lock, flags); + list_del(&slot->idle_slot_node); +- spin_unlock_irqrestore(&ksm->idle_slots_lock, flags); ++ spin_unlock_irqrestore(&profile->idle_slots_lock, flags); + } + +-static struct blk_ksm_keyslot *blk_ksm_find_keyslot( +- struct blk_keyslot_manager *ksm, +- const struct blk_crypto_key *key) ++static struct blk_crypto_keyslot * ++blk_crypto_find_keyslot(struct blk_crypto_profile *profile, ++ const struct blk_crypto_key *key) + { +- const struct hlist_head *head = blk_ksm_hash_bucket_for_key(ksm, key); +- struct blk_ksm_keyslot *slotp; ++ const struct hlist_head *head = ++ blk_crypto_hash_bucket_for_key(profile, key); ++ struct blk_crypto_keyslot *slotp; + + hlist_for_each_entry(slotp, head, hash_node) { + if (slotp->key == key) +@@ -193,68 +193,79 @@ static struct blk_ksm_keyslot *blk_ksm_find_keyslot( + return NULL; + } + +-static struct blk_ksm_keyslot *blk_ksm_find_and_grab_keyslot( +- struct blk_keyslot_manager *ksm, +- const struct blk_crypto_key *key) ++static struct blk_crypto_keyslot * ++blk_crypto_find_and_grab_keyslot(struct blk_crypto_profile *profile, ++ const struct blk_crypto_key *key) + { +- struct blk_ksm_keyslot *slot; ++ struct blk_crypto_keyslot *slot; + +- slot = blk_ksm_find_keyslot(ksm, key); ++ slot = blk_crypto_find_keyslot(profile, key); + if (!slot) + return NULL; + if (atomic_inc_return(&slot->slot_refs) == 1) { + /* Took first reference to this slot; remove it from LRU list */ +- blk_ksm_remove_slot_from_lru_list(slot); ++ blk_crypto_remove_slot_from_lru_list(slot); + } + return slot; + } + +-unsigned int blk_ksm_get_slot_idx(struct blk_ksm_keyslot *slot) ++/** ++ * blk_crypto_keyslot_index() - Get the index of a keyslot ++ * @slot: a keyslot that blk_crypto_get_keyslot() returned ++ * ++ * Return: the 0-based index of the keyslot within the device's keyslots. ++ */ ++unsigned int blk_crypto_keyslot_index(struct blk_crypto_keyslot *slot) + { +- return slot - slot->ksm->slots; ++ return slot - slot->profile->slots; + } +-EXPORT_SYMBOL_GPL(blk_ksm_get_slot_idx); ++EXPORT_SYMBOL_GPL(blk_crypto_keyslot_index); + + /** +- * blk_ksm_get_slot_for_key() - Program a key into a keyslot. +- * @ksm: The keyslot manager to program the key into. +- * @key: Pointer to the key object to program, including the raw key, crypto +- * mode, and data unit size. +- * @slot_ptr: A pointer to return the pointer of the allocated keyslot. ++ * blk_crypto_get_keyslot() - Get a keyslot for a key, if needed. ++ * @profile: the crypto profile of the device the key will be used on ++ * @key: the key that will be used ++ * @slot_ptr: If a keyslot is allocated, an opaque pointer to the keyslot struct ++ * will be stored here; otherwise NULL will be stored here. ++ * ++ * If the device has keyslots, this gets a keyslot that's been programmed with ++ * the specified key. If the key is already in a slot, this reuses it; ++ * otherwise this waits for a slot to become idle and programs the key into it. + * +- * Get a keyslot that's been programmed with the specified key. If one already +- * exists, return it with incremented refcount. Otherwise, wait for a keyslot +- * to become idle and program it. ++ * This must be paired with a call to blk_crypto_put_keyslot(). + * +- * Context: Process context. Takes and releases ksm->lock. +- * Return: BLK_STS_OK on success (and keyslot is set to the pointer of the +- * allocated keyslot), or some other blk_status_t otherwise (and +- * keyslot is set to NULL). ++ * Context: Process context. Takes and releases profile->lock. ++ * Return: BLK_STS_OK on success, meaning that either a keyslot was allocated or ++ * one wasn't needed; or a blk_status_t error on failure. + */ +-blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm, +- const struct blk_crypto_key *key, +- struct blk_ksm_keyslot **slot_ptr) ++blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile, ++ const struct blk_crypto_key *key, ++ struct blk_crypto_keyslot **slot_ptr) + { +- struct blk_ksm_keyslot *slot; ++ struct blk_crypto_keyslot *slot; + int slot_idx; + int err; + + *slot_ptr = NULL; + +- if (blk_ksm_is_passthrough(ksm)) ++ /* ++ * If the device has no concept of "keyslots", then there is no need to ++ * get one. ++ */ ++ if (profile->num_slots == 0) + return BLK_STS_OK; + +- down_read(&ksm->lock); +- slot = blk_ksm_find_and_grab_keyslot(ksm, key); +- up_read(&ksm->lock); ++ down_read(&profile->lock); ++ slot = blk_crypto_find_and_grab_keyslot(profile, key); ++ up_read(&profile->lock); + if (slot) + goto success; + + for (;;) { +- blk_ksm_hw_enter(ksm); +- slot = blk_ksm_find_and_grab_keyslot(ksm, key); ++ blk_crypto_hw_enter(profile); ++ slot = blk_crypto_find_and_grab_keyslot(profile, key); + if (slot) { +- blk_ksm_hw_exit(ksm); ++ blk_crypto_hw_exit(profile); + goto success; + } + +@@ -262,22 +273,22 @@ blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm, + * If we're here, that means there wasn't a slot that was + * already programmed with the key. So try to program it. + */ +- if (!list_empty(&ksm->idle_slots)) ++ if (!list_empty(&profile->idle_slots)) + break; + +- blk_ksm_hw_exit(ksm); +- wait_event(ksm->idle_slots_wait_queue, +- !list_empty(&ksm->idle_slots)); ++ blk_crypto_hw_exit(profile); ++ wait_event(profile->idle_slots_wait_queue, ++ !list_empty(&profile->idle_slots)); + } + +- slot = list_first_entry(&ksm->idle_slots, struct blk_ksm_keyslot, ++ slot = list_first_entry(&profile->idle_slots, struct blk_crypto_keyslot, + idle_slot_node); +- slot_idx = blk_ksm_get_slot_idx(slot); ++ slot_idx = blk_crypto_keyslot_index(slot); + +- err = ksm->ksm_ll_ops.keyslot_program(ksm, key, slot_idx); ++ err = profile->ll_ops.keyslot_program(profile, key, slot_idx); + if (err) { +- wake_up(&ksm->idle_slots_wait_queue); +- blk_ksm_hw_exit(ksm); ++ wake_up(&profile->idle_slots_wait_queue); ++ blk_crypto_hw_exit(profile); + return errno_to_blk_status(err); + } + +@@ -285,97 +296,98 @@ blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm, + if (slot->key) + hlist_del(&slot->hash_node); + slot->key = key; +- hlist_add_head(&slot->hash_node, blk_ksm_hash_bucket_for_key(ksm, key)); ++ hlist_add_head(&slot->hash_node, ++ blk_crypto_hash_bucket_for_key(profile, key)); + + atomic_set(&slot->slot_refs, 1); + +- blk_ksm_remove_slot_from_lru_list(slot); ++ blk_crypto_remove_slot_from_lru_list(slot); + +- blk_ksm_hw_exit(ksm); ++ blk_crypto_hw_exit(profile); + success: + *slot_ptr = slot; + return BLK_STS_OK; + } + + /** +- * blk_ksm_put_slot() - Release a reference to a slot +- * @slot: The keyslot to release the reference of. ++ * blk_crypto_put_keyslot() - Release a reference to a keyslot ++ * @slot: The keyslot to release the reference of (may be NULL). + * + * Context: Any context. + */ +-void blk_ksm_put_slot(struct blk_ksm_keyslot *slot) ++void blk_crypto_put_keyslot(struct blk_crypto_keyslot *slot) + { +- struct blk_keyslot_manager *ksm; ++ struct blk_crypto_profile *profile; + unsigned long flags; + + if (!slot) + return; + +- ksm = slot->ksm; ++ profile = slot->profile; + + if (atomic_dec_and_lock_irqsave(&slot->slot_refs, +- &ksm->idle_slots_lock, flags)) { +- list_add_tail(&slot->idle_slot_node, &ksm->idle_slots); +- spin_unlock_irqrestore(&ksm->idle_slots_lock, flags); +- wake_up(&ksm->idle_slots_wait_queue); ++ &profile->idle_slots_lock, flags)) { ++ list_add_tail(&slot->idle_slot_node, &profile->idle_slots); ++ spin_unlock_irqrestore(&profile->idle_slots_lock, flags); ++ wake_up(&profile->idle_slots_wait_queue); + } + } + + /** +- * blk_ksm_crypto_cfg_supported() - Find out if a crypto configuration is +- * supported by a ksm. +- * @ksm: The keyslot manager to check +- * @cfg: The crypto configuration to check for. +- * +- * Checks for crypto_mode/data unit size/dun bytes support. ++ * __blk_crypto_cfg_supported() - Check whether the given crypto profile ++ * supports the given crypto configuration. ++ * @profile: the crypto profile to check ++ * @cfg: the crypto configuration to check for + * +- * Return: Whether or not this ksm supports the specified crypto config. ++ * Return: %true if @profile supports the given @cfg. + */ +-bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm, +- const struct blk_crypto_config *cfg) ++bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile, ++ const struct blk_crypto_config *cfg) + { +- if (!ksm) ++ if (!profile) + return false; +- if (!(ksm->crypto_modes_supported[cfg->crypto_mode] & +- cfg->data_unit_size)) ++ if (!(profile->modes_supported[cfg->crypto_mode] & cfg->data_unit_size)) + return false; +- if (ksm->max_dun_bytes_supported < cfg->dun_bytes) ++ if (profile->max_dun_bytes_supported < cfg->dun_bytes) + return false; + return true; + } + + /** +- * blk_ksm_evict_key() - Evict a key from the lower layer device. +- * @ksm: The keyslot manager to evict from +- * @key: The key to evict ++ * __blk_crypto_evict_key() - Evict a key from a device. ++ * @profile: the crypto profile of the device ++ * @key: the key to evict. It must not still be used in any I/O. ++ * ++ * If the device has keyslots, this finds the keyslot (if any) that contains the ++ * specified key and calls the driver's keyslot_evict function to evict it. + * +- * Find the keyslot that the specified key was programmed into, and evict that +- * slot from the lower layer device. The slot must not be in use by any +- * in-flight IO when this function is called. ++ * Otherwise, this just calls the driver's keyslot_evict function if it is ++ * implemented, passing just the key (without any particular keyslot). This ++ * allows layered devices to evict the key from their underlying devices. + * +- * Context: Process context. Takes and releases ksm->lock. ++ * Context: Process context. Takes and releases profile->lock. + * Return: 0 on success or if there's no keyslot with the specified key, -EBUSY + * if the keyslot is still in use, or another -errno value on other + * error. + */ +-int blk_ksm_evict_key(struct blk_keyslot_manager *ksm, +- const struct blk_crypto_key *key) ++int __blk_crypto_evict_key(struct blk_crypto_profile *profile, ++ const struct blk_crypto_key *key) + { +- struct blk_ksm_keyslot *slot; ++ struct blk_crypto_keyslot *slot; + int err = 0; + +- if (blk_ksm_is_passthrough(ksm)) { +- if (ksm->ksm_ll_ops.keyslot_evict) { +- blk_ksm_hw_enter(ksm); +- err = ksm->ksm_ll_ops.keyslot_evict(ksm, key, -1); +- blk_ksm_hw_exit(ksm); ++ if (profile->num_slots == 0) { ++ if (profile->ll_ops.keyslot_evict) { ++ blk_crypto_hw_enter(profile); ++ err = profile->ll_ops.keyslot_evict(profile, key, -1); ++ blk_crypto_hw_exit(profile); + return err; + } + return 0; + } + +- blk_ksm_hw_enter(ksm); +- slot = blk_ksm_find_keyslot(ksm, key); ++ blk_crypto_hw_enter(profile); ++ slot = blk_crypto_find_keyslot(profile, key); + if (!slot) + goto out_unlock; + +@@ -383,8 +395,8 @@ int blk_ksm_evict_key(struct blk_keyslot_manager *ksm, + err = -EBUSY; + goto out_unlock; + } +- err = ksm->ksm_ll_ops.keyslot_evict(ksm, key, +- blk_ksm_get_slot_idx(slot)); ++ err = profile->ll_ops.keyslot_evict(profile, key, ++ blk_crypto_keyslot_index(slot)); + if (err) + goto out_unlock; + +@@ -392,81 +404,84 @@ int blk_ksm_evict_key(struct blk_keyslot_manager *ksm, + slot->key = NULL; + err = 0; + out_unlock: +- blk_ksm_hw_exit(ksm); ++ blk_crypto_hw_exit(profile); + return err; + } + + /** +- * blk_ksm_reprogram_all_keys() - Re-program all keyslots. +- * @ksm: The keyslot manager ++ * blk_crypto_reprogram_all_keys() - Re-program all keyslots. ++ * @profile: The crypto profile + * + * Re-program all keyslots that are supposed to have a key programmed. This is + * intended only for use by drivers for hardware that loses its keys on reset. + * +- * Context: Process context. Takes and releases ksm->lock. ++ * Context: Process context. Takes and releases profile->lock. + */ +-void blk_ksm_reprogram_all_keys(struct blk_keyslot_manager *ksm) ++void blk_crypto_reprogram_all_keys(struct blk_crypto_profile *profile) + { + unsigned int slot; + +- if (blk_ksm_is_passthrough(ksm)) ++ if (profile->num_slots == 0) + return; + + /* This is for device initialization, so don't resume the device */ +- down_write(&ksm->lock); +- for (slot = 0; slot < ksm->num_slots; slot++) { +- const struct blk_crypto_key *key = ksm->slots[slot].key; ++ down_write(&profile->lock); ++ for (slot = 0; slot < profile->num_slots; slot++) { ++ const struct blk_crypto_key *key = profile->slots[slot].key; + int err; + + if (!key) + continue; + +- err = ksm->ksm_ll_ops.keyslot_program(ksm, key, slot); ++ err = profile->ll_ops.keyslot_program(profile, key, slot); + WARN_ON(err); + } +- up_write(&ksm->lock); ++ up_write(&profile->lock); + } +-EXPORT_SYMBOL_GPL(blk_ksm_reprogram_all_keys); ++EXPORT_SYMBOL_GPL(blk_crypto_reprogram_all_keys); + +-void blk_ksm_destroy(struct blk_keyslot_manager *ksm) ++void blk_crypto_profile_destroy(struct blk_crypto_profile *profile) + { +- if (!ksm) ++ if (!profile) + return; +- kvfree(ksm->slot_hashtable); +- kvfree_sensitive(ksm->slots, sizeof(ksm->slots[0]) * ksm->num_slots); +- memzero_explicit(ksm, sizeof(*ksm)); ++ kvfree(profile->slot_hashtable); ++ kvfree_sensitive(profile->slots, ++ sizeof(profile->slots[0]) * profile->num_slots); ++ memzero_explicit(profile, sizeof(*profile)); + } +-EXPORT_SYMBOL_GPL(blk_ksm_destroy); ++EXPORT_SYMBOL_GPL(blk_crypto_profile_destroy); + +-bool blk_ksm_register(struct blk_keyslot_manager *ksm, struct request_queue *q) ++bool blk_crypto_register(struct blk_crypto_profile *profile, ++ struct request_queue *q) + { + if (blk_integrity_queue_supports_integrity(q)) { + pr_warn("Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n"); + return false; + } +- q->ksm = ksm; ++ q->crypto_profile = profile; + return true; + } +-EXPORT_SYMBOL_GPL(blk_ksm_register); ++EXPORT_SYMBOL_GPL(blk_crypto_register); + +-void blk_ksm_unregister(struct request_queue *q) ++void blk_crypto_unregister(struct request_queue *q) + { +- q->ksm = NULL; ++ q->crypto_profile = NULL; + } + + /** +- * blk_ksm_intersect_modes() - restrict supported modes by child device +- * @parent: The keyslot manager for parent device +- * @child: The keyslot manager for child device, or NULL ++ * blk_crypto_intersect_capabilities() - restrict supported crypto capabilities ++ * by child device ++ * @parent: the crypto profile for the parent device ++ * @child: the crypto profile for the child device, or NULL + * +- * Clear any crypto mode support bits in @parent that aren't set in @child. +- * If @child is NULL, then all parent bits are cleared. ++ * This clears all crypto capabilities in @parent that aren't set in @child. If ++ * @child is NULL, then this clears all parent capabilities. + * +- * Only use this when setting up the keyslot manager for a layered device, +- * before it's been exposed yet. ++ * Only use this when setting up the crypto profile for a layered device, before ++ * it's been exposed yet. + */ +-void blk_ksm_intersect_modes(struct blk_keyslot_manager *parent, +- const struct blk_keyslot_manager *child) ++void blk_crypto_intersect_capabilities(struct blk_crypto_profile *parent, ++ const struct blk_crypto_profile *child) + { + if (child) { + unsigned int i; +@@ -474,73 +489,63 @@ void blk_ksm_intersect_modes(struct blk_keyslot_manager *parent, + parent->max_dun_bytes_supported = + min(parent->max_dun_bytes_supported, + child->max_dun_bytes_supported); +- for (i = 0; i < ARRAY_SIZE(child->crypto_modes_supported); +- i++) { +- parent->crypto_modes_supported[i] &= +- child->crypto_modes_supported[i]; +- } ++ for (i = 0; i < ARRAY_SIZE(child->modes_supported); i++) ++ parent->modes_supported[i] &= child->modes_supported[i]; + } else { + parent->max_dun_bytes_supported = 0; +- memset(parent->crypto_modes_supported, 0, +- sizeof(parent->crypto_modes_supported)); ++ memset(parent->modes_supported, 0, ++ sizeof(parent->modes_supported)); + } + } +-EXPORT_SYMBOL_GPL(blk_ksm_intersect_modes); ++EXPORT_SYMBOL_GPL(blk_crypto_intersect_capabilities); + + /** +- * blk_ksm_is_superset() - Check if a KSM supports a superset of crypto modes +- * and DUN bytes that another KSM supports. Here, +- * "superset" refers to the mathematical meaning of the +- * word - i.e. if two KSMs have the *same* capabilities, +- * they *are* considered supersets of each other. +- * @ksm_superset: The KSM that we want to verify is a superset +- * @ksm_subset: The KSM that we want to verify is a subset ++ * blk_crypto_has_capabilities() - Check whether @target supports at least all ++ * the crypto capabilities that @reference does. ++ * @target: the target profile ++ * @reference: the reference profile + * +- * Return: True if @ksm_superset supports a superset of the crypto modes and DUN +- * bytes that @ksm_subset supports. ++ * Return: %true if @target supports all the crypto capabilities of @reference. + */ +-bool blk_ksm_is_superset(struct blk_keyslot_manager *ksm_superset, +- struct blk_keyslot_manager *ksm_subset) ++bool blk_crypto_has_capabilities(const struct blk_crypto_profile *target, ++ const struct blk_crypto_profile *reference) + { + int i; + +- if (!ksm_subset) ++ if (!reference) + return true; + +- if (!ksm_superset) ++ if (!target) + return false; + +- for (i = 0; i < ARRAY_SIZE(ksm_superset->crypto_modes_supported); i++) { +- if (ksm_subset->crypto_modes_supported[i] & +- (~ksm_superset->crypto_modes_supported[i])) { ++ for (i = 0; i < ARRAY_SIZE(target->modes_supported); i++) { ++ if (reference->modes_supported[i] & ~target->modes_supported[i]) + return false; +- } + } + +- if (ksm_subset->max_dun_bytes_supported > +- ksm_superset->max_dun_bytes_supported) { ++ if (reference->max_dun_bytes_supported > ++ target->max_dun_bytes_supported) + return false; +- } + + return true; + } +-EXPORT_SYMBOL_GPL(blk_ksm_is_superset); ++EXPORT_SYMBOL_GPL(blk_crypto_has_capabilities); + + /** +- * blk_ksm_update_capabilities() - Update the restrictions of a KSM to those of +- * another KSM +- * @target_ksm: The KSM whose restrictions to update. +- * @reference_ksm: The KSM to whose restrictions this function will update +- * @target_ksm's restrictions to. ++ * blk_crypto_update_capabilities() - Update the capabilities of a crypto ++ * profile to match those of another crypto ++ * profile. ++ * @dst: The crypto profile whose capabilities to update. ++ * @src: The crypto profile whose capabilities this function will update @dst's ++ * capabilities to. + * + * Blk-crypto requires that crypto capabilities that were + * advertised when a bio was created continue to be supported by the + * device until that bio is ended. This is turn means that a device cannot + * shrink its advertised crypto capabilities without any explicit + * synchronization with upper layers. So if there's no such explicit +- * synchronization, @reference_ksm must support all the crypto capabilities that +- * @target_ksm does +- * (i.e. we need blk_ksm_is_superset(@reference_ksm, @target_ksm) == true). ++ * synchronization, @src must support all the crypto capabilities that ++ * @dst does (i.e. we need blk_crypto_has_capabilities(@src, @dst)). + * + * Note also that as long as the crypto capabilities are being expanded, the + * order of updates becoming visible is not important because it's alright +@@ -549,31 +554,12 @@ EXPORT_SYMBOL_GPL(blk_ksm_is_superset); + * might result in blk-crypto-fallback being used if available, or the bio being + * failed). + */ +-void blk_ksm_update_capabilities(struct blk_keyslot_manager *target_ksm, +- struct blk_keyslot_manager *reference_ksm) ++void blk_crypto_update_capabilities(struct blk_crypto_profile *dst, ++ const struct blk_crypto_profile *src) + { +- memcpy(target_ksm->crypto_modes_supported, +- reference_ksm->crypto_modes_supported, +- sizeof(target_ksm->crypto_modes_supported)); ++ memcpy(dst->modes_supported, src->modes_supported, ++ sizeof(dst->modes_supported)); + +- target_ksm->max_dun_bytes_supported = +- reference_ksm->max_dun_bytes_supported; +-} +-EXPORT_SYMBOL_GPL(blk_ksm_update_capabilities); +- +-/** +- * blk_ksm_init_passthrough() - Init a passthrough keyslot manager +- * @ksm: The keyslot manager to init +- * +- * Initialize a passthrough keyslot manager. +- * Called by e.g. storage drivers to set up a keyslot manager in their +- * request_queue, when the storage driver wants to manage its keys by itself. +- * This is useful for inline encryption hardware that doesn't have the concept +- * of keyslots, and for layered devices. +- */ +-void blk_ksm_init_passthrough(struct blk_keyslot_manager *ksm) +-{ +- memset(ksm, 0, sizeof(*ksm)); +- init_rwsem(&ksm->lock); ++ dst->max_dun_bytes_supported = src->max_dun_bytes_supported; + } +-EXPORT_SYMBOL_GPL(blk_ksm_init_passthrough); ++EXPORT_SYMBOL_GPL(blk_crypto_update_capabilities); +diff --git a/block/blk-crypto.c b/block/blk-crypto.c +index 76ce7a5d2676..ec9efeeeca91 100644 +--- a/block/blk-crypto.c ++++ b/block/blk-crypto.c +@@ -218,8 +218,9 @@ static bool bio_crypt_check_alignment(struct bio *bio) + + blk_status_t __blk_crypto_init_request(struct request *rq) + { +- return blk_ksm_get_slot_for_key(rq->q->ksm, rq->crypt_ctx->bc_key, +- &rq->crypt_keyslot); ++ return blk_crypto_get_keyslot(rq->q->crypto_profile, ++ rq->crypt_ctx->bc_key, ++ &rq->crypt_keyslot); + } + + /** +@@ -233,7 +234,7 @@ blk_status_t __blk_crypto_init_request(struct request *rq) + */ + void __blk_crypto_free_request(struct request *rq) + { +- blk_ksm_put_slot(rq->crypt_keyslot); ++ blk_crypto_put_keyslot(rq->crypt_keyslot); + mempool_free(rq->crypt_ctx, bio_crypt_ctx_pool); + blk_crypto_rq_set_defaults(rq); + } +@@ -264,6 +265,7 @@ bool __blk_crypto_bio_prep(struct bio **bio_ptr) + { + struct bio *bio = *bio_ptr; + const struct blk_crypto_key *bc_key = bio->bi_crypt_context->bc_key; ++ struct blk_crypto_profile *profile; + + /* Error if bio has no data. */ + if (WARN_ON_ONCE(!bio_has_data(bio))) { +@@ -280,8 +282,8 @@ bool __blk_crypto_bio_prep(struct bio **bio_ptr) + * Success if device supports the encryption context, or if we succeeded + * in falling back to the crypto API. + */ +- if (blk_ksm_crypto_cfg_supported(bdev_get_queue(bio->bi_bdev)->ksm, +- &bc_key->crypto_cfg)) ++ profile = bdev_get_queue(bio->bi_bdev)->crypto_profile; ++ if (__blk_crypto_cfg_supported(profile, &bc_key->crypto_cfg)) + return true; + + if (blk_crypto_fallback_bio_prep(bio_ptr)) +@@ -357,7 +359,7 @@ bool blk_crypto_config_supported(struct request_queue *q, + const struct blk_crypto_config *cfg) + { + return IS_ENABLED(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) || +- blk_ksm_crypto_cfg_supported(q->ksm, cfg); ++ __blk_crypto_cfg_supported(q->crypto_profile, cfg); + } + + /** +@@ -378,7 +380,7 @@ bool blk_crypto_config_supported(struct request_queue *q, + int blk_crypto_start_using_key(const struct blk_crypto_key *key, + struct request_queue *q) + { +- if (blk_ksm_crypto_cfg_supported(q->ksm, &key->crypto_cfg)) ++ if (__blk_crypto_cfg_supported(q->crypto_profile, &key->crypto_cfg)) + return 0; + return blk_crypto_fallback_start_using_mode(key->crypto_cfg.crypto_mode); + } +@@ -394,18 +396,17 @@ int blk_crypto_start_using_key(const struct blk_crypto_key *key, + * evicted from any hardware that it might have been programmed into. The key + * must not be in use by any in-flight IO when this function is called. + * +- * Return: 0 on success or if key is not present in the q's ksm, -err on error. ++ * Return: 0 on success or if the key wasn't in any keyslot; -errno on error. + */ + int blk_crypto_evict_key(struct request_queue *q, + const struct blk_crypto_key *key) + { +- if (blk_ksm_crypto_cfg_supported(q->ksm, &key->crypto_cfg)) +- return blk_ksm_evict_key(q->ksm, key); ++ if (__blk_crypto_cfg_supported(q->crypto_profile, &key->crypto_cfg)) ++ return __blk_crypto_evict_key(q->crypto_profile, key); + + /* +- * If the request queue's associated inline encryption hardware didn't +- * have support for the key, then the key might have been programmed +- * into the fallback keyslot manager, so try to evict from there. ++ * If the request_queue didn't support the key, then blk-crypto-fallback ++ * may have been used, so try to evict the key from blk-crypto-fallback. + */ + return blk_crypto_fallback_evict_key(key); + } +diff --git a/block/blk-integrity.c b/block/blk-integrity.c +index cef534a7cbc9..d670d54e5f7a 100644 +--- a/block/blk-integrity.c ++++ b/block/blk-integrity.c +@@ -409,9 +409,9 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue); + + #ifdef CONFIG_BLK_INLINE_ENCRYPTION +- if (disk->queue->ksm) { ++ if (disk->queue->crypto_profile) { + pr_warn("blk-integrity: Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n"); +- blk_ksm_unregister(disk->queue); ++ blk_crypto_unregister(disk->queue); + } + #endif + } +diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h +index 841ed87999e7..b855fef4f38a 100644 +--- a/drivers/md/dm-core.h ++++ b/drivers/md/dm-core.h +@@ -200,7 +200,7 @@ struct dm_table { + struct dm_md_mempools *mempools; + + #ifdef CONFIG_BLK_INLINE_ENCRYPTION +- struct blk_keyslot_manager *ksm; ++ struct blk_crypto_profile *crypto_profile; + #endif + }; + +diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c +index 1fa4d5582dca..8b0f27a745d9 100644 +--- a/drivers/md/dm-table.c ++++ b/drivers/md/dm-table.c +@@ -170,7 +170,7 @@ static void free_devices(struct list_head *devices, struct mapped_device *md) + } + } + +-static void dm_table_destroy_keyslot_manager(struct dm_table *t); ++static void dm_table_destroy_crypto_profile(struct dm_table *t); + + void dm_table_destroy(struct dm_table *t) + { +@@ -200,7 +200,7 @@ void dm_table_destroy(struct dm_table *t) + + dm_free_md_mempools(t->mempools); + +- dm_table_destroy_keyslot_manager(t); ++ dm_table_destroy_crypto_profile(t); + + kfree(t); + } +@@ -1187,8 +1187,8 @@ static int dm_table_register_integrity(struct dm_table *t) + + #ifdef CONFIG_BLK_INLINE_ENCRYPTION + +-struct dm_keyslot_manager { +- struct blk_keyslot_manager ksm; ++struct dm_crypto_profile { ++ struct blk_crypto_profile profile; + struct mapped_device *md; + }; + +@@ -1214,13 +1214,11 @@ static int dm_keyslot_evict_callback(struct dm_target *ti, struct dm_dev *dev, + * When an inline encryption key is evicted from a device-mapper device, evict + * it from all the underlying devices. + */ +-static int dm_keyslot_evict(struct blk_keyslot_manager *ksm, ++static int dm_keyslot_evict(struct blk_crypto_profile *profile, + const struct blk_crypto_key *key, unsigned int slot) + { +- struct dm_keyslot_manager *dksm = container_of(ksm, +- struct dm_keyslot_manager, +- ksm); +- struct mapped_device *md = dksm->md; ++ struct mapped_device *md = ++ container_of(profile, struct dm_crypto_profile, profile)->md; + struct dm_keyslot_evict_args args = { key }; + struct dm_table *t; + int srcu_idx; +@@ -1240,150 +1238,148 @@ static int dm_keyslot_evict(struct blk_keyslot_manager *ksm, + return args.err; + } + +-static const struct blk_ksm_ll_ops dm_ksm_ll_ops = { +- .keyslot_evict = dm_keyslot_evict, +-}; +- +-static int device_intersect_crypto_modes(struct dm_target *ti, +- struct dm_dev *dev, sector_t start, +- sector_t len, void *data) ++static int ++device_intersect_crypto_capabilities(struct dm_target *ti, struct dm_dev *dev, ++ sector_t start, sector_t len, void *data) + { +- struct blk_keyslot_manager *parent = data; +- struct blk_keyslot_manager *child = bdev_get_queue(dev->bdev)->ksm; ++ struct blk_crypto_profile *parent = data; ++ struct blk_crypto_profile *child = ++ bdev_get_queue(dev->bdev)->crypto_profile; + +- blk_ksm_intersect_modes(parent, child); ++ blk_crypto_intersect_capabilities(parent, child); + return 0; + } + +-void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm) ++void dm_destroy_crypto_profile(struct blk_crypto_profile *profile) + { +- struct dm_keyslot_manager *dksm = container_of(ksm, +- struct dm_keyslot_manager, +- ksm); ++ struct dm_crypto_profile *dmcp = container_of(profile, ++ struct dm_crypto_profile, ++ profile); + +- if (!ksm) ++ if (!profile) + return; + +- blk_ksm_destroy(ksm); +- kfree(dksm); ++ blk_crypto_profile_destroy(profile); ++ kfree(dmcp); + } + +-static void dm_table_destroy_keyslot_manager(struct dm_table *t) ++static void dm_table_destroy_crypto_profile(struct dm_table *t) + { +- dm_destroy_keyslot_manager(t->ksm); +- t->ksm = NULL; ++ dm_destroy_crypto_profile(t->crypto_profile); ++ t->crypto_profile = NULL; + } + + /* +- * Constructs and initializes t->ksm with a keyslot manager that +- * represents the common set of crypto capabilities of the devices +- * described by the dm_table. However, if the constructed keyslot +- * manager does not support a superset of the crypto capabilities +- * supported by the current keyslot manager of the mapped_device, +- * it returns an error instead, since we don't support restricting +- * crypto capabilities on table changes. Finally, if the constructed +- * keyslot manager doesn't actually support any crypto modes at all, +- * it just returns NULL. ++ * Constructs and initializes t->crypto_profile with a crypto profile that ++ * represents the common set of crypto capabilities of the devices described by ++ * the dm_table. However, if the constructed crypto profile doesn't support all ++ * crypto capabilities that are supported by the current mapped_device, it ++ * returns an error instead, since we don't support removing crypto capabilities ++ * on table changes. Finally, if the constructed crypto profile is "empty" (has ++ * no crypto capabilities at all), it just sets t->crypto_profile to NULL. + */ +-static int dm_table_construct_keyslot_manager(struct dm_table *t) ++static int dm_table_construct_crypto_profile(struct dm_table *t) + { +- struct dm_keyslot_manager *dksm; +- struct blk_keyslot_manager *ksm; ++ struct dm_crypto_profile *dmcp; ++ struct blk_crypto_profile *profile; + struct dm_target *ti; + unsigned int i; +- bool ksm_is_empty = true; ++ bool empty_profile = true; + +- dksm = kmalloc(sizeof(*dksm), GFP_KERNEL); +- if (!dksm) ++ dmcp = kmalloc(sizeof(*dmcp), GFP_KERNEL); ++ if (!dmcp) + return -ENOMEM; +- dksm->md = t->md; ++ dmcp->md = t->md; + +- ksm = &dksm->ksm; +- blk_ksm_init_passthrough(ksm); +- ksm->ksm_ll_ops = dm_ksm_ll_ops; +- ksm->max_dun_bytes_supported = UINT_MAX; +- memset(ksm->crypto_modes_supported, 0xFF, +- sizeof(ksm->crypto_modes_supported)); ++ profile = &dmcp->profile; ++ blk_crypto_profile_init(profile, 0); ++ profile->ll_ops.keyslot_evict = dm_keyslot_evict; ++ profile->max_dun_bytes_supported = UINT_MAX; ++ memset(profile->modes_supported, 0xFF, ++ sizeof(profile->modes_supported)); + + for (i = 0; i < dm_table_get_num_targets(t); i++) { + ti = dm_table_get_target(t, i); + + if (!dm_target_passes_crypto(ti->type)) { +- blk_ksm_intersect_modes(ksm, NULL); ++ blk_crypto_intersect_capabilities(profile, NULL); + break; + } + if (!ti->type->iterate_devices) + continue; +- ti->type->iterate_devices(ti, device_intersect_crypto_modes, +- ksm); ++ ti->type->iterate_devices(ti, ++ device_intersect_crypto_capabilities, ++ profile); + } + +- if (t->md->queue && !blk_ksm_is_superset(ksm, t->md->queue->ksm)) { ++ if (t->md->queue && ++ !blk_crypto_has_capabilities(profile, ++ t->md->queue->crypto_profile)) { + DMWARN("Inline encryption capabilities of new DM table were more restrictive than the old table's. This is not supported!"); +- dm_destroy_keyslot_manager(ksm); ++ dm_destroy_crypto_profile(profile); + return -EINVAL; + } + + /* +- * If the new KSM doesn't actually support any crypto modes, we may as +- * well represent it with a NULL ksm. ++ * If the new profile doesn't actually support any crypto capabilities, ++ * we may as well represent it with a NULL profile. + */ +- ksm_is_empty = true; +- for (i = 0; i < ARRAY_SIZE(ksm->crypto_modes_supported); i++) { +- if (ksm->crypto_modes_supported[i]) { +- ksm_is_empty = false; ++ for (i = 0; i < ARRAY_SIZE(profile->modes_supported); i++) { ++ if (profile->modes_supported[i]) { ++ empty_profile = false; + break; + } + } + +- if (ksm_is_empty) { +- dm_destroy_keyslot_manager(ksm); +- ksm = NULL; ++ if (empty_profile) { ++ dm_destroy_crypto_profile(profile); ++ profile = NULL; + } + + /* +- * t->ksm is only set temporarily while the table is being set +- * up, and it gets set to NULL after the capabilities have +- * been transferred to the request_queue. ++ * t->crypto_profile is only set temporarily while the table is being ++ * set up, and it gets set to NULL after the profile has been ++ * transferred to the request_queue. + */ +- t->ksm = ksm; ++ t->crypto_profile = profile; + + return 0; + } + +-static void dm_update_keyslot_manager(struct request_queue *q, +- struct dm_table *t) ++static void dm_update_crypto_profile(struct request_queue *q, ++ struct dm_table *t) + { +- if (!t->ksm) ++ if (!t->crypto_profile) + return; + +- /* Make the ksm less restrictive */ +- if (!q->ksm) { +- blk_ksm_register(t->ksm, q); ++ /* Make the crypto profile less restrictive. */ ++ if (!q->crypto_profile) { ++ blk_crypto_register(t->crypto_profile, q); + } else { +- blk_ksm_update_capabilities(q->ksm, t->ksm); +- dm_destroy_keyslot_manager(t->ksm); ++ blk_crypto_update_capabilities(q->crypto_profile, ++ t->crypto_profile); ++ dm_destroy_crypto_profile(t->crypto_profile); + } +- t->ksm = NULL; ++ t->crypto_profile = NULL; + } + + #else /* CONFIG_BLK_INLINE_ENCRYPTION */ + +-static int dm_table_construct_keyslot_manager(struct dm_table *t) ++static int dm_table_construct_crypto_profile(struct dm_table *t) + { + return 0; + } + +-void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm) ++void dm_destroy_crypto_profile(struct blk_crypto_profile *profile) + { + } + +-static void dm_table_destroy_keyslot_manager(struct dm_table *t) ++static void dm_table_destroy_crypto_profile(struct dm_table *t) + { + } + +-static void dm_update_keyslot_manager(struct request_queue *q, +- struct dm_table *t) ++static void dm_update_crypto_profile(struct request_queue *q, ++ struct dm_table *t) + { + } + +@@ -1415,9 +1411,9 @@ int dm_table_complete(struct dm_table *t) + return r; + } + +- r = dm_table_construct_keyslot_manager(t); ++ r = dm_table_construct_crypto_profile(t); + if (r) { +- DMERR("could not construct keyslot manager."); ++ DMERR("could not construct crypto profile."); + return r; + } + +@@ -2071,7 +2067,7 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, + return r; + } + +- dm_update_keyslot_manager(q, t); ++ dm_update_crypto_profile(q, t); + disk_update_readahead(t->md->disk); + + return 0; +diff --git a/drivers/md/dm.c b/drivers/md/dm.c +index 4184fd8ccb08..8b91f4f0e053 100644 +--- a/drivers/md/dm.c ++++ b/drivers/md/dm.c +@@ -1663,14 +1663,14 @@ static const struct dax_operations dm_dax_ops; + static void dm_wq_work(struct work_struct *work); + + #ifdef CONFIG_BLK_INLINE_ENCRYPTION +-static void dm_queue_destroy_keyslot_manager(struct request_queue *q) ++static void dm_queue_destroy_crypto_profile(struct request_queue *q) + { +- dm_destroy_keyslot_manager(q->ksm); ++ dm_destroy_crypto_profile(q->crypto_profile); + } + + #else /* CONFIG_BLK_INLINE_ENCRYPTION */ + +-static inline void dm_queue_destroy_keyslot_manager(struct request_queue *q) ++static inline void dm_queue_destroy_crypto_profile(struct request_queue *q) + { + } + #endif /* !CONFIG_BLK_INLINE_ENCRYPTION */ +@@ -1696,7 +1696,7 @@ static void cleanup_mapped_device(struct mapped_device *md) + dm_sysfs_exit(md); + del_gendisk(md->disk); + } +- dm_queue_destroy_keyslot_manager(md->queue); ++ dm_queue_destroy_crypto_profile(md->queue); + blk_cleanup_disk(md->disk); + } + +diff --git a/drivers/mmc/core/crypto.c b/drivers/mmc/core/crypto.c +index 67557808cada..fec4fbf16a5b 100644 +--- a/drivers/mmc/core/crypto.c ++++ b/drivers/mmc/core/crypto.c +@@ -16,13 +16,13 @@ void mmc_crypto_set_initial_state(struct mmc_host *host) + { + /* Reset might clear all keys, so reprogram all the keys. */ + if (host->caps2 & MMC_CAP2_CRYPTO) +- blk_ksm_reprogram_all_keys(&host->ksm); ++ blk_crypto_reprogram_all_keys(&host->crypto_profile); + } + + void mmc_crypto_setup_queue(struct request_queue *q, struct mmc_host *host) + { + if (host->caps2 & MMC_CAP2_CRYPTO) +- blk_ksm_register(&host->ksm, q); ++ blk_crypto_register(&host->crypto_profile, q); + } + EXPORT_SYMBOL_GPL(mmc_crypto_setup_queue); + +@@ -30,12 +30,15 @@ void mmc_crypto_prepare_req(struct mmc_queue_req *mqrq) + { + struct request *req = mmc_queue_req_to_req(mqrq); + struct mmc_request *mrq = &mqrq->brq.mrq; ++ struct blk_crypto_keyslot *keyslot; + + if (!req->crypt_ctx) + return; + + mrq->crypto_ctx = req->crypt_ctx; +- if (req->crypt_keyslot) +- mrq->crypto_key_slot = blk_ksm_get_slot_idx(req->crypt_keyslot); ++ ++ keyslot = req->crypt_keyslot; ++ if (keyslot) ++ mrq->crypto_key_slot = blk_crypto_keyslot_index(keyslot); + } + EXPORT_SYMBOL_GPL(mmc_crypto_prepare_req); +diff --git a/drivers/mmc/host/cqhci-crypto.c b/drivers/mmc/host/cqhci-crypto.c +index 628bbfaf8312..d5f4b6972f63 100644 +--- a/drivers/mmc/host/cqhci-crypto.c ++++ b/drivers/mmc/host/cqhci-crypto.c +@@ -23,9 +23,10 @@ static const struct cqhci_crypto_alg_entry { + }; + + static inline struct cqhci_host * +-cqhci_host_from_ksm(struct blk_keyslot_manager *ksm) ++cqhci_host_from_crypto_profile(struct blk_crypto_profile *profile) + { +- struct mmc_host *mmc = container_of(ksm, struct mmc_host, ksm); ++ struct mmc_host *mmc = ++ container_of(profile, struct mmc_host, crypto_profile); + + return mmc->cqe_private; + } +@@ -57,12 +58,12 @@ static int cqhci_crypto_program_key(struct cqhci_host *cq_host, + return 0; + } + +-static int cqhci_crypto_keyslot_program(struct blk_keyslot_manager *ksm, ++static int cqhci_crypto_keyslot_program(struct blk_crypto_profile *profile, + const struct blk_crypto_key *key, + unsigned int slot) + + { +- struct cqhci_host *cq_host = cqhci_host_from_ksm(ksm); ++ struct cqhci_host *cq_host = cqhci_host_from_crypto_profile(profile); + const union cqhci_crypto_cap_entry *ccap_array = + cq_host->crypto_cap_array; + const struct cqhci_crypto_alg_entry *alg = +@@ -115,11 +116,11 @@ static int cqhci_crypto_clear_keyslot(struct cqhci_host *cq_host, int slot) + return cqhci_crypto_program_key(cq_host, &cfg, slot); + } + +-static int cqhci_crypto_keyslot_evict(struct blk_keyslot_manager *ksm, ++static int cqhci_crypto_keyslot_evict(struct blk_crypto_profile *profile, + const struct blk_crypto_key *key, + unsigned int slot) + { +- struct cqhci_host *cq_host = cqhci_host_from_ksm(ksm); ++ struct cqhci_host *cq_host = cqhci_host_from_crypto_profile(profile); + + return cqhci_crypto_clear_keyslot(cq_host, slot); + } +@@ -132,7 +133,7 @@ static int cqhci_crypto_keyslot_evict(struct blk_keyslot_manager *ksm, + * "enabled" when these are called, i.e. CQHCI_ENABLE might not be set in the + * CQHCI_CFG register. But the hardware allows that. + */ +-static const struct blk_ksm_ll_ops cqhci_ksm_ops = { ++static const struct blk_crypto_ll_ops cqhci_crypto_ops = { + .keyslot_program = cqhci_crypto_keyslot_program, + .keyslot_evict = cqhci_crypto_keyslot_evict, + }; +@@ -157,8 +158,8 @@ cqhci_find_blk_crypto_mode(union cqhci_crypto_cap_entry cap) + * + * If the driver previously set MMC_CAP2_CRYPTO and the CQE declares + * CQHCI_CAP_CS, initialize the crypto support. This involves reading the +- * crypto capability registers, initializing the keyslot manager, clearing all +- * keyslots, and enabling 128-bit task descriptors. ++ * crypto capability registers, initializing the blk_crypto_profile, clearing ++ * all keyslots, and enabling 128-bit task descriptors. + * + * Return: 0 if crypto was initialized or isn't supported; whether + * MMC_CAP2_CRYPTO remains set indicates which one of those cases it is. +@@ -168,7 +169,7 @@ int cqhci_crypto_init(struct cqhci_host *cq_host) + { + struct mmc_host *mmc = cq_host->mmc; + struct device *dev = mmc_dev(mmc); +- struct blk_keyslot_manager *ksm = &mmc->ksm; ++ struct blk_crypto_profile *profile = &mmc->crypto_profile; + unsigned int num_keyslots; + unsigned int cap_idx; + enum blk_crypto_mode_num blk_mode_num; +@@ -199,15 +200,15 @@ int cqhci_crypto_init(struct cqhci_host *cq_host) + */ + num_keyslots = cq_host->crypto_capabilities.config_count + 1; + +- err = devm_blk_ksm_init(dev, ksm, num_keyslots); ++ err = devm_blk_crypto_profile_init(dev, profile, num_keyslots); + if (err) + goto out; + +- ksm->ksm_ll_ops = cqhci_ksm_ops; +- ksm->dev = dev; ++ profile->ll_ops = cqhci_crypto_ops; ++ profile->dev = dev; + + /* Unfortunately, CQHCI crypto only supports 32 DUN bits. */ +- ksm->max_dun_bytes_supported = 4; ++ profile->max_dun_bytes_supported = 4; + + /* + * Cache all the crypto capabilities and advertise the supported crypto +@@ -223,7 +224,7 @@ int cqhci_crypto_init(struct cqhci_host *cq_host) + cq_host->crypto_cap_array[cap_idx]); + if (blk_mode_num == BLK_ENCRYPTION_MODE_INVALID) + continue; +- ksm->crypto_modes_supported[blk_mode_num] |= ++ profile->modes_supported[blk_mode_num] |= + cq_host->crypto_cap_array[cap_idx].sdus_mask * 512; + } + +diff --git a/drivers/scsi/ufs/ufshcd-crypto.c b/drivers/scsi/ufs/ufshcd-crypto.c +index d70cdcd35e43..67402baf6fae 100644 +--- a/drivers/scsi/ufs/ufshcd-crypto.c ++++ b/drivers/scsi/ufs/ufshcd-crypto.c +@@ -48,11 +48,12 @@ static int ufshcd_program_key(struct ufs_hba *hba, + return err; + } + +-static int ufshcd_crypto_keyslot_program(struct blk_keyslot_manager *ksm, ++static int ufshcd_crypto_keyslot_program(struct blk_crypto_profile *profile, + const struct blk_crypto_key *key, + unsigned int slot) + { +- struct ufs_hba *hba = container_of(ksm, struct ufs_hba, ksm); ++ struct ufs_hba *hba = ++ container_of(profile, struct ufs_hba, crypto_profile); + const union ufs_crypto_cap_entry *ccap_array = hba->crypto_cap_array; + const struct ufs_crypto_alg_entry *alg = + &ufs_crypto_algs[key->crypto_cfg.crypto_mode]; +@@ -105,11 +106,12 @@ static int ufshcd_clear_keyslot(struct ufs_hba *hba, int slot) + return ufshcd_program_key(hba, &cfg, slot); + } + +-static int ufshcd_crypto_keyslot_evict(struct blk_keyslot_manager *ksm, ++static int ufshcd_crypto_keyslot_evict(struct blk_crypto_profile *profile, + const struct blk_crypto_key *key, + unsigned int slot) + { +- struct ufs_hba *hba = container_of(ksm, struct ufs_hba, ksm); ++ struct ufs_hba *hba = ++ container_of(profile, struct ufs_hba, crypto_profile); + + return ufshcd_clear_keyslot(hba, slot); + } +@@ -120,11 +122,11 @@ bool ufshcd_crypto_enable(struct ufs_hba *hba) + return false; + + /* Reset might clear all keys, so reprogram all the keys. */ +- blk_ksm_reprogram_all_keys(&hba->ksm); ++ blk_crypto_reprogram_all_keys(&hba->crypto_profile); + return true; + } + +-static const struct blk_ksm_ll_ops ufshcd_ksm_ops = { ++static const struct blk_crypto_ll_ops ufshcd_crypto_ops = { + .keyslot_program = ufshcd_crypto_keyslot_program, + .keyslot_evict = ufshcd_crypto_keyslot_evict, + }; +@@ -179,15 +181,16 @@ int ufshcd_hba_init_crypto_capabilities(struct ufs_hba *hba) + } + + /* The actual number of configurations supported is (CFGC+1) */ +- err = devm_blk_ksm_init(hba->dev, &hba->ksm, +- hba->crypto_capabilities.config_count + 1); ++ err = devm_blk_crypto_profile_init( ++ hba->dev, &hba->crypto_profile, ++ hba->crypto_capabilities.config_count + 1); + if (err) + goto out; + +- hba->ksm.ksm_ll_ops = ufshcd_ksm_ops; ++ hba->crypto_profile.ll_ops = ufshcd_crypto_ops; + /* UFS only supports 8 bytes for any DUN */ +- hba->ksm.max_dun_bytes_supported = 8; +- hba->ksm.dev = hba->dev; ++ hba->crypto_profile.max_dun_bytes_supported = 8; ++ hba->crypto_profile.dev = hba->dev; + + /* + * Cache all the UFS crypto capabilities and advertise the supported +@@ -202,7 +205,7 @@ int ufshcd_hba_init_crypto_capabilities(struct ufs_hba *hba) + blk_mode_num = ufshcd_find_blk_crypto_mode( + hba->crypto_cap_array[cap_idx]); + if (blk_mode_num != BLK_ENCRYPTION_MODE_INVALID) +- hba->ksm.crypto_modes_supported[blk_mode_num] |= ++ hba->crypto_profile.modes_supported[blk_mode_num] |= + hba->crypto_cap_array[cap_idx].sdus_mask * 512; + } + +@@ -230,9 +233,8 @@ void ufshcd_init_crypto(struct ufs_hba *hba) + ufshcd_clear_keyslot(hba, slot); + } + +-void ufshcd_crypto_setup_rq_keyslot_manager(struct ufs_hba *hba, +- struct request_queue *q) ++void ufshcd_crypto_register(struct ufs_hba *hba, struct request_queue *q) + { + if (hba->caps & UFSHCD_CAP_CRYPTO) +- blk_ksm_register(&hba->ksm, q); ++ blk_crypto_register(&hba->crypto_profile, q); + } +diff --git a/drivers/scsi/ufs/ufshcd-crypto.h b/drivers/scsi/ufs/ufshcd-crypto.h +index 78a58e788dff..e18c01276873 100644 +--- a/drivers/scsi/ufs/ufshcd-crypto.h ++++ b/drivers/scsi/ufs/ufshcd-crypto.h +@@ -18,7 +18,7 @@ static inline void ufshcd_prepare_lrbp_crypto(struct request *rq, + return; + } + +- lrbp->crypto_key_slot = blk_ksm_get_slot_idx(rq->crypt_keyslot); ++ lrbp->crypto_key_slot = blk_crypto_keyslot_index(rq->crypt_keyslot); + lrbp->data_unit_num = rq->crypt_ctx->bc_dun[0]; + } + +@@ -40,8 +40,7 @@ int ufshcd_hba_init_crypto_capabilities(struct ufs_hba *hba); + + void ufshcd_init_crypto(struct ufs_hba *hba); + +-void ufshcd_crypto_setup_rq_keyslot_manager(struct ufs_hba *hba, +- struct request_queue *q); ++void ufshcd_crypto_register(struct ufs_hba *hba, struct request_queue *q); + + #else /* CONFIG_SCSI_UFS_CRYPTO */ + +@@ -64,8 +63,8 @@ static inline int ufshcd_hba_init_crypto_capabilities(struct ufs_hba *hba) + + static inline void ufshcd_init_crypto(struct ufs_hba *hba) { } + +-static inline void ufshcd_crypto_setup_rq_keyslot_manager(struct ufs_hba *hba, +- struct request_queue *q) { } ++static inline void ufshcd_crypto_register(struct ufs_hba *hba, ++ struct request_queue *q) { } + + #endif /* CONFIG_SCSI_UFS_CRYPTO */ + +diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c +index 95be7ecdfe10..bf81da2ecf98 100644 +--- a/drivers/scsi/ufs/ufshcd.c ++++ b/drivers/scsi/ufs/ufshcd.c +@@ -4986,7 +4986,7 @@ static int ufshcd_slave_configure(struct scsi_device *sdev) + else if (ufshcd_is_rpm_autosuspend_allowed(hba)) + sdev->rpm_autosuspend = 1; + +- ufshcd_crypto_setup_rq_keyslot_manager(hba, q); ++ ufshcd_crypto_register(hba, q); + + return 0; + } +diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h +index 885fcf2e5922..62bdc412d38a 100644 +--- a/drivers/scsi/ufs/ufshcd.h ++++ b/drivers/scsi/ufs/ufshcd.h +@@ -766,7 +766,7 @@ struct ufs_hba_monitor { + * @crypto_capabilities: Content of crypto capabilities register (0x100) + * @crypto_cap_array: Array of crypto capabilities + * @crypto_cfg_register: Start of the crypto cfg array +- * @ksm: the keyslot manager tied to this hba ++ * @crypto_profile: the crypto profile of this hba (if applicable) + */ + struct ufs_hba { + void __iomem *mmio_base; +@@ -911,7 +911,7 @@ struct ufs_hba { + union ufs_crypto_capabilities crypto_capabilities; + union ufs_crypto_cap_entry *crypto_cap_array; + u32 crypto_cfg_register; +- struct blk_keyslot_manager ksm; ++ struct blk_crypto_profile crypto_profile; + #endif + #ifdef CONFIG_DEBUG_FS + struct dentry *debugfs_root; +diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h +index a27605e2f826..bbab65bd5428 100644 +--- a/include/linux/blk-crypto-profile.h ++++ b/include/linux/blk-crypto-profile.h +@@ -3,67 +3,113 @@ + * Copyright 2019 Google LLC + */ + +-#ifndef __LINUX_KEYSLOT_MANAGER_H +-#define __LINUX_KEYSLOT_MANAGER_H ++#ifndef __LINUX_BLK_CRYPTO_PROFILE_H ++#define __LINUX_BLK_CRYPTO_PROFILE_H + + #include + #include + +-struct blk_keyslot_manager; ++struct blk_crypto_profile; + + /** +- * struct blk_ksm_ll_ops - functions to manage keyslots in hardware +- * @keyslot_program: Program the specified key into the specified slot in the +- * inline encryption hardware. +- * @keyslot_evict: Evict key from the specified keyslot in the hardware. +- * The key is provided so that e.g. dm layers can evict +- * keys from the devices that they map over. +- * Returns 0 on success, -errno otherwise. ++ * struct blk_crypto_ll_ops - functions to control inline encryption hardware + * +- * This structure should be provided by storage device drivers when they set up +- * a keyslot manager - this structure holds the function ptrs that the keyslot +- * manager will use to manipulate keyslots in the hardware. ++ * Low-level operations for controlling inline encryption hardware. This ++ * interface must be implemented by storage drivers that support inline ++ * encryption. All functions may sleep, are serialized by profile->lock, and ++ * are never called while profile->dev (if set) is runtime-suspended. + */ +-struct blk_ksm_ll_ops { +- int (*keyslot_program)(struct blk_keyslot_manager *ksm, ++struct blk_crypto_ll_ops { ++ ++ /** ++ * @keyslot_program: Program a key into the inline encryption hardware. ++ * ++ * Program @key into the specified @slot in the inline encryption ++ * hardware, overwriting any key that the keyslot may already contain. ++ * The keyslot is guaranteed to not be in-use by any I/O. ++ * ++ * This is required if the device has keyslots. Otherwise (i.e. if the ++ * device is a layered device, or if the device is real hardware that ++ * simply doesn't have the concept of keyslots) it is never called. ++ * ++ * Must return 0 on success, or -errno on failure. ++ */ ++ int (*keyslot_program)(struct blk_crypto_profile *profile, + const struct blk_crypto_key *key, + unsigned int slot); +- int (*keyslot_evict)(struct blk_keyslot_manager *ksm, ++ ++ /** ++ * @keyslot_evict: Evict a key from the inline encryption hardware. ++ * ++ * If the device has keyslots, this function must evict the key from the ++ * specified @slot. The slot will contain @key, but there should be no ++ * need for the @key argument to be used as @slot should be sufficient. ++ * The keyslot is guaranteed to not be in-use by any I/O. ++ * ++ * If the device doesn't have keyslots itself, this function must evict ++ * @key from any underlying devices. @slot won't be valid in this case. ++ * ++ * If there are no keyslots and no underlying devices, this function ++ * isn't required. ++ * ++ * Must return 0 on success, or -errno on failure. ++ */ ++ int (*keyslot_evict)(struct blk_crypto_profile *profile, + const struct blk_crypto_key *key, + unsigned int slot); + }; + +-struct blk_keyslot_manager { +- /* +- * The struct blk_ksm_ll_ops that this keyslot manager will use +- * to perform operations like programming and evicting keys on the +- * device ++/** ++ * struct blk_crypto_profile - inline encryption profile for a device ++ * ++ * This struct contains a storage device's inline encryption capabilities (e.g. ++ * the supported crypto algorithms), driver-provided functions to control the ++ * inline encryption hardware (e.g. programming and evicting keys), and optional ++ * device-independent keyslot management data. ++ */ ++struct blk_crypto_profile { ++ ++ /* public: Drivers must initialize the following fields. */ ++ ++ /** ++ * @ll_ops: Driver-provided functions to control the inline encryption ++ * hardware, e.g. program and evict keys. + */ +- struct blk_ksm_ll_ops ksm_ll_ops; ++ struct blk_crypto_ll_ops ll_ops; + +- /* +- * The maximum number of bytes supported for specifying the data unit +- * number. ++ /** ++ * @max_dun_bytes_supported: The maximum number of bytes supported for ++ * specifying the data unit number (DUN). Specifically, the range of ++ * supported DUNs is 0 through (1 << (8 * max_dun_bytes_supported)) - 1. + */ + unsigned int max_dun_bytes_supported; + +- /* +- * Array of size BLK_ENCRYPTION_MODE_MAX of bitmasks that represents +- * whether a crypto mode and data unit size are supported. The i'th +- * bit of crypto_mode_supported[crypto_mode] is set iff a data unit +- * size of (1 << i) is supported. We only support data unit sizes +- * that are powers of 2. ++ /** ++ * @modes_supported: Array of bitmasks that specifies whether each ++ * combination of crypto mode and data unit size is supported. ++ * Specifically, the i'th bit of modes_supported[crypto_mode] is set if ++ * crypto_mode can be used with a data unit size of (1 << i). Note that ++ * only data unit sizes that are powers of 2 can be supported. + */ +- unsigned int crypto_modes_supported[BLK_ENCRYPTION_MODE_MAX]; ++ unsigned int modes_supported[BLK_ENCRYPTION_MODE_MAX]; + +- /* Device for runtime power management (NULL if none) */ ++ /** ++ * @dev: An optional device for runtime power management. If the driver ++ * provides this device, it will be runtime-resumed before any function ++ * in @ll_ops is called and will remain resumed during the call. ++ */ + struct device *dev; + +- /* Here onwards are *private* fields for internal keyslot manager use */ ++ /* private: The following fields shouldn't be accessed by drivers. */ + ++ /* Number of keyslots, or 0 if not applicable */ + unsigned int num_slots; + +- /* Protects programming and evicting keys from the device */ ++ /* ++ * Serializes all calls to functions in @ll_ops as well as all changes ++ * to @slot_hashtable. This can also be taken in read mode to look up ++ * keyslots while ensuring that they can't be changed concurrently. ++ */ + struct rw_semaphore lock; + + /* List of idle slots, with least recently used slot at front */ +@@ -80,41 +126,41 @@ struct blk_keyslot_manager { + unsigned int log_slot_ht_size; + + /* Per-keyslot data */ +- struct blk_ksm_keyslot *slots; ++ struct blk_crypto_keyslot *slots; + }; + +-int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots); +- +-int devm_blk_ksm_init(struct device *dev, struct blk_keyslot_manager *ksm, +- unsigned int num_slots); ++int blk_crypto_profile_init(struct blk_crypto_profile *profile, ++ unsigned int num_slots); + +-blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm, +- const struct blk_crypto_key *key, +- struct blk_ksm_keyslot **slot_ptr); ++int devm_blk_crypto_profile_init(struct device *dev, ++ struct blk_crypto_profile *profile, ++ unsigned int num_slots); + +-unsigned int blk_ksm_get_slot_idx(struct blk_ksm_keyslot *slot); ++unsigned int blk_crypto_keyslot_index(struct blk_crypto_keyslot *slot); + +-void blk_ksm_put_slot(struct blk_ksm_keyslot *slot); ++blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile, ++ const struct blk_crypto_key *key, ++ struct blk_crypto_keyslot **slot_ptr); + +-bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm, +- const struct blk_crypto_config *cfg); ++void blk_crypto_put_keyslot(struct blk_crypto_keyslot *slot); + +-int blk_ksm_evict_key(struct blk_keyslot_manager *ksm, +- const struct blk_crypto_key *key); ++bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile, ++ const struct blk_crypto_config *cfg); + +-void blk_ksm_reprogram_all_keys(struct blk_keyslot_manager *ksm); ++int __blk_crypto_evict_key(struct blk_crypto_profile *profile, ++ const struct blk_crypto_key *key); + +-void blk_ksm_destroy(struct blk_keyslot_manager *ksm); ++void blk_crypto_reprogram_all_keys(struct blk_crypto_profile *profile); + +-void blk_ksm_intersect_modes(struct blk_keyslot_manager *parent, +- const struct blk_keyslot_manager *child); ++void blk_crypto_profile_destroy(struct blk_crypto_profile *profile); + +-void blk_ksm_init_passthrough(struct blk_keyslot_manager *ksm); ++void blk_crypto_intersect_capabilities(struct blk_crypto_profile *parent, ++ const struct blk_crypto_profile *child); + +-bool blk_ksm_is_superset(struct blk_keyslot_manager *ksm_superset, +- struct blk_keyslot_manager *ksm_subset); ++bool blk_crypto_has_capabilities(const struct blk_crypto_profile *target, ++ const struct blk_crypto_profile *reference); + +-void blk_ksm_update_capabilities(struct blk_keyslot_manager *target_ksm, +- struct blk_keyslot_manager *reference_ksm); ++void blk_crypto_update_capabilities(struct blk_crypto_profile *dst, ++ const struct blk_crypto_profile *src); + +-#endif /* __LINUX_KEYSLOT_MANAGER_H */ ++#endif /* __LINUX_BLK_CRYPTO_PROFILE_H */ +diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h +index e13780236550..b4039fdf1b04 100644 +--- a/include/linux/blk-mq.h ++++ b/include/linux/blk-mq.h +@@ -133,7 +133,7 @@ struct request { + + #ifdef CONFIG_BLK_INLINE_ENCRYPTION + struct bio_crypt_ctx *crypt_ctx; +- struct blk_ksm_keyslot *crypt_keyslot; ++ struct blk_crypto_keyslot *crypt_keyslot; + #endif + + unsigned short write_hint; +diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h +index c7b1e9355123..f72ccb2829db 100644 +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -30,7 +30,7 @@ struct pr_ops; + struct rq_qos; + struct blk_queue_stats; + struct blk_stat_callback; +-struct blk_keyslot_manager; ++struct blk_crypto_profile; + + /* Must be consistent with blk_mq_poll_stats_bkt() */ + #define BLK_MQ_POLL_STATS_BKTS 16 +@@ -224,8 +224,7 @@ struct request_queue { + unsigned int dma_alignment; + + #ifdef CONFIG_BLK_INLINE_ENCRYPTION +- /* Inline crypto capabilities */ +- struct blk_keyslot_manager *ksm; ++ struct blk_crypto_profile *crypto_profile; + #endif + + unsigned int rq_timeout; +@@ -1142,19 +1141,20 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned lo + + #ifdef CONFIG_BLK_INLINE_ENCRYPTION + +-bool blk_ksm_register(struct blk_keyslot_manager *ksm, struct request_queue *q); ++bool blk_crypto_register(struct blk_crypto_profile *profile, ++ struct request_queue *q); + +-void blk_ksm_unregister(struct request_queue *q); ++void blk_crypto_unregister(struct request_queue *q); + + #else /* CONFIG_BLK_INLINE_ENCRYPTION */ + +-static inline bool blk_ksm_register(struct blk_keyslot_manager *ksm, +- struct request_queue *q) ++static inline bool blk_crypto_register(struct blk_crypto_profile *profile, ++ struct request_queue *q) + { + return true; + } + +-static inline void blk_ksm_unregister(struct request_queue *q) { } ++static inline void blk_crypto_unregister(struct request_queue *q) { } + + #endif /* CONFIG_BLK_INLINE_ENCRYPTION */ + +diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h +index 114553b487ef..a7df155ea49b 100644 +--- a/include/linux/device-mapper.h ++++ b/include/linux/device-mapper.h +@@ -576,9 +576,9 @@ struct dm_table *dm_swap_table(struct mapped_device *md, + struct dm_table *t); + + /* +- * Table keyslot manager functions ++ * Table blk_crypto_profile functions + */ +-void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm); ++void dm_destroy_crypto_profile(struct blk_crypto_profile *profile); + + /*----------------------------------------------------------------- + * Macros. +diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h +index 725b1de41767..52eae8c45b8d 100644 +--- a/include/linux/mmc/host.h ++++ b/include/linux/mmc/host.h +@@ -492,7 +492,7 @@ struct mmc_host { + + /* Inline encryption support */ + #ifdef CONFIG_MMC_CRYPTO +- struct blk_keyslot_manager ksm; ++ struct blk_crypto_profile crypto_profile; + #endif + + /* Host Software Queue support */ +-- +2.35.3 + diff --git a/patches.suse/blk-crypto-rename-keyslot-manager-files-to-blk-crypt.patch b/patches.suse/blk-crypto-rename-keyslot-manager-files-to-blk-crypt.patch new file mode 100644 index 0000000..640e0fc --- /dev/null +++ b/patches.suse/blk-crypto-rename-keyslot-manager-files-to-blk-crypt.patch @@ -0,0 +1,1571 @@ +From: Eric Biggers +Date: Mon, 18 Oct 2021 11:04:51 -0700 +Subject: [PATCH] blk-crypto: rename keyslot-manager files to + blk-crypto-profile +Git-commit: 1e8d44bddf57f6d878e083f281a34d5c88feb7db +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +In preparation for renaming struct blk_keyslot_manager to struct +blk_crypto_profile, rename the keyslot-manager.h and keyslot-manager.c +source files. Renaming these files separately before making a lot of +changes to their contents makes it easier for git to understand that +they were renamed. + +Acked-by: Ulf Hansson # For MMC +Reviewed-by: Christoph Hellwig +Reviewed-by: Mike Snitzer +Reviewed-by: Martin K. Petersen +Signed-off-by: Eric Biggers +Link: https://lore.kernel.org/r/20211018180453.40441-3-ebiggers@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/Makefile | 2 +- + block/blk-crypto-fallback.c | 2 +- + block/blk-crypto-profile.c | 579 +++++++++++++++++++++++++++++ + block/blk-crypto.c | 2 +- + block/keyslot-manager.c | 579 ----------------------------- + drivers/md/dm-core.h | 2 +- + drivers/md/dm.c | 2 +- + drivers/mmc/host/cqhci-crypto.c | 2 +- + drivers/scsi/ufs/ufshcd.h | 2 +- + include/linux/blk-crypto-profile.h | 120 ++++++ + include/linux/keyslot-manager.h | 120 ------ + include/linux/mmc/host.h | 2 +- + 12 files changed, 707 insertions(+), 707 deletions(-) + create mode 100644 block/blk-crypto-profile.c + delete mode 100644 block/keyslot-manager.c + create mode 100644 include/linux/blk-crypto-profile.h + delete mode 100644 include/linux/keyslot-manager.h + +diff --git a/block/Makefile b/block/Makefile +index 74df168729ec..602f7f47b7b6 100644 +--- a/block/Makefile ++++ b/block/Makefile +@@ -36,6 +36,6 @@ obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o + obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o + obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o + obj-$(CONFIG_BLK_PM) += blk-pm.o +-obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += keyslot-manager.o blk-crypto.o ++obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += blk-crypto.o blk-crypto-profile.o + obj-$(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) += blk-crypto-fallback.o + obj-$(CONFIG_BLOCK_HOLDER_DEPRECATED) += holder.o +diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c +index 1bcc1a151424..08bfea292c75 100644 +--- a/block/blk-crypto-fallback.c ++++ b/block/blk-crypto-fallback.c +@@ -12,9 +12,9 @@ + #include + #include + #include ++#include + #include + #include +-#include + #include + #include + #include +diff --git a/block/blk-crypto-profile.c b/block/blk-crypto-profile.c +new file mode 100644 +index 000000000000..1a235fa3c3e8 +--- /dev/null ++++ b/block/blk-crypto-profile.c +@@ -0,0 +1,579 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright 2019 Google LLC ++ */ ++ ++/** ++ * DOC: The Keyslot Manager ++ * ++ * Many devices with inline encryption support have a limited number of "slots" ++ * into which encryption contexts may be programmed, and requests can be tagged ++ * with a slot number to specify the key to use for en/decryption. ++ * ++ * As the number of slots is limited, and programming keys is expensive on ++ * many inline encryption hardware, we don't want to program the same key into ++ * multiple slots - if multiple requests are using the same key, we want to ++ * program just one slot with that key and use that slot for all requests. ++ * ++ * The keyslot manager manages these keyslots appropriately, and also acts as ++ * an abstraction between the inline encryption hardware and the upper layers. ++ * ++ * Lower layer devices will set up a keyslot manager in their request queue ++ * and tell it how to perform device specific operations like programming/ ++ * evicting keys from keyslots. ++ * ++ * Upper layers will call blk_ksm_get_slot_for_key() to program a ++ * key into some slot in the inline encryption hardware. ++ */ ++ ++#define pr_fmt(fmt) "blk-crypto: " fmt ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++struct blk_ksm_keyslot { ++ atomic_t slot_refs; ++ struct list_head idle_slot_node; ++ struct hlist_node hash_node; ++ const struct blk_crypto_key *key; ++ struct blk_keyslot_manager *ksm; ++}; ++ ++static inline void blk_ksm_hw_enter(struct blk_keyslot_manager *ksm) ++{ ++ /* ++ * Calling into the driver requires ksm->lock held and the device ++ * resumed. But we must resume the device first, since that can acquire ++ * and release ksm->lock via blk_ksm_reprogram_all_keys(). ++ */ ++ if (ksm->dev) ++ pm_runtime_get_sync(ksm->dev); ++ down_write(&ksm->lock); ++} ++ ++static inline void blk_ksm_hw_exit(struct blk_keyslot_manager *ksm) ++{ ++ up_write(&ksm->lock); ++ if (ksm->dev) ++ pm_runtime_put_sync(ksm->dev); ++} ++ ++static inline bool blk_ksm_is_passthrough(struct blk_keyslot_manager *ksm) ++{ ++ return ksm->num_slots == 0; ++} ++ ++/** ++ * blk_ksm_init() - Initialize a keyslot manager ++ * @ksm: The keyslot_manager to initialize. ++ * @num_slots: The number of key slots to manage. ++ * ++ * Allocate memory for keyslots and initialize a keyslot manager. Called by ++ * e.g. storage drivers to set up a keyslot manager in their request_queue. ++ * ++ * Return: 0 on success, or else a negative error code. ++ */ ++int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots) ++{ ++ unsigned int slot; ++ unsigned int i; ++ unsigned int slot_hashtable_size; ++ ++ memset(ksm, 0, sizeof(*ksm)); ++ ++ if (num_slots == 0) ++ return -EINVAL; ++ ++ ksm->slots = kvcalloc(num_slots, sizeof(ksm->slots[0]), GFP_KERNEL); ++ if (!ksm->slots) ++ return -ENOMEM; ++ ++ ksm->num_slots = num_slots; ++ ++ init_rwsem(&ksm->lock); ++ ++ init_waitqueue_head(&ksm->idle_slots_wait_queue); ++ INIT_LIST_HEAD(&ksm->idle_slots); ++ ++ for (slot = 0; slot < num_slots; slot++) { ++ ksm->slots[slot].ksm = ksm; ++ list_add_tail(&ksm->slots[slot].idle_slot_node, ++ &ksm->idle_slots); ++ } ++ ++ spin_lock_init(&ksm->idle_slots_lock); ++ ++ slot_hashtable_size = roundup_pow_of_two(num_slots); ++ /* ++ * hash_ptr() assumes bits != 0, so ensure the hash table has at least 2 ++ * buckets. This only makes a difference when there is only 1 keyslot. ++ */ ++ if (slot_hashtable_size < 2) ++ slot_hashtable_size = 2; ++ ++ ksm->log_slot_ht_size = ilog2(slot_hashtable_size); ++ ksm->slot_hashtable = kvmalloc_array(slot_hashtable_size, ++ sizeof(ksm->slot_hashtable[0]), ++ GFP_KERNEL); ++ if (!ksm->slot_hashtable) ++ goto err_destroy_ksm; ++ for (i = 0; i < slot_hashtable_size; i++) ++ INIT_HLIST_HEAD(&ksm->slot_hashtable[i]); ++ ++ return 0; ++ ++err_destroy_ksm: ++ blk_ksm_destroy(ksm); ++ return -ENOMEM; ++} ++EXPORT_SYMBOL_GPL(blk_ksm_init); ++ ++static void blk_ksm_destroy_callback(void *ksm) ++{ ++ blk_ksm_destroy(ksm); ++} ++ ++/** ++ * devm_blk_ksm_init() - Resource-managed blk_ksm_init() ++ * @dev: The device which owns the blk_keyslot_manager. ++ * @ksm: The blk_keyslot_manager to initialize. ++ * @num_slots: The number of key slots to manage. ++ * ++ * Like blk_ksm_init(), but causes blk_ksm_destroy() to be called automatically ++ * on driver detach. ++ * ++ * Return: 0 on success, or else a negative error code. ++ */ ++int devm_blk_ksm_init(struct device *dev, struct blk_keyslot_manager *ksm, ++ unsigned int num_slots) ++{ ++ int err = blk_ksm_init(ksm, num_slots); ++ ++ if (err) ++ return err; ++ ++ return devm_add_action_or_reset(dev, blk_ksm_destroy_callback, ksm); ++} ++EXPORT_SYMBOL_GPL(devm_blk_ksm_init); ++ ++static inline struct hlist_head * ++blk_ksm_hash_bucket_for_key(struct blk_keyslot_manager *ksm, ++ const struct blk_crypto_key *key) ++{ ++ return &ksm->slot_hashtable[hash_ptr(key, ksm->log_slot_ht_size)]; ++} ++ ++static void blk_ksm_remove_slot_from_lru_list(struct blk_ksm_keyslot *slot) ++{ ++ struct blk_keyslot_manager *ksm = slot->ksm; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ksm->idle_slots_lock, flags); ++ list_del(&slot->idle_slot_node); ++ spin_unlock_irqrestore(&ksm->idle_slots_lock, flags); ++} ++ ++static struct blk_ksm_keyslot *blk_ksm_find_keyslot( ++ struct blk_keyslot_manager *ksm, ++ const struct blk_crypto_key *key) ++{ ++ const struct hlist_head *head = blk_ksm_hash_bucket_for_key(ksm, key); ++ struct blk_ksm_keyslot *slotp; ++ ++ hlist_for_each_entry(slotp, head, hash_node) { ++ if (slotp->key == key) ++ return slotp; ++ } ++ return NULL; ++} ++ ++static struct blk_ksm_keyslot *blk_ksm_find_and_grab_keyslot( ++ struct blk_keyslot_manager *ksm, ++ const struct blk_crypto_key *key) ++{ ++ struct blk_ksm_keyslot *slot; ++ ++ slot = blk_ksm_find_keyslot(ksm, key); ++ if (!slot) ++ return NULL; ++ if (atomic_inc_return(&slot->slot_refs) == 1) { ++ /* Took first reference to this slot; remove it from LRU list */ ++ blk_ksm_remove_slot_from_lru_list(slot); ++ } ++ return slot; ++} ++ ++unsigned int blk_ksm_get_slot_idx(struct blk_ksm_keyslot *slot) ++{ ++ return slot - slot->ksm->slots; ++} ++EXPORT_SYMBOL_GPL(blk_ksm_get_slot_idx); ++ ++/** ++ * blk_ksm_get_slot_for_key() - Program a key into a keyslot. ++ * @ksm: The keyslot manager to program the key into. ++ * @key: Pointer to the key object to program, including the raw key, crypto ++ * mode, and data unit size. ++ * @slot_ptr: A pointer to return the pointer of the allocated keyslot. ++ * ++ * Get a keyslot that's been programmed with the specified key. If one already ++ * exists, return it with incremented refcount. Otherwise, wait for a keyslot ++ * to become idle and program it. ++ * ++ * Context: Process context. Takes and releases ksm->lock. ++ * Return: BLK_STS_OK on success (and keyslot is set to the pointer of the ++ * allocated keyslot), or some other blk_status_t otherwise (and ++ * keyslot is set to NULL). ++ */ ++blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm, ++ const struct blk_crypto_key *key, ++ struct blk_ksm_keyslot **slot_ptr) ++{ ++ struct blk_ksm_keyslot *slot; ++ int slot_idx; ++ int err; ++ ++ *slot_ptr = NULL; ++ ++ if (blk_ksm_is_passthrough(ksm)) ++ return BLK_STS_OK; ++ ++ down_read(&ksm->lock); ++ slot = blk_ksm_find_and_grab_keyslot(ksm, key); ++ up_read(&ksm->lock); ++ if (slot) ++ goto success; ++ ++ for (;;) { ++ blk_ksm_hw_enter(ksm); ++ slot = blk_ksm_find_and_grab_keyslot(ksm, key); ++ if (slot) { ++ blk_ksm_hw_exit(ksm); ++ goto success; ++ } ++ ++ /* ++ * If we're here, that means there wasn't a slot that was ++ * already programmed with the key. So try to program it. ++ */ ++ if (!list_empty(&ksm->idle_slots)) ++ break; ++ ++ blk_ksm_hw_exit(ksm); ++ wait_event(ksm->idle_slots_wait_queue, ++ !list_empty(&ksm->idle_slots)); ++ } ++ ++ slot = list_first_entry(&ksm->idle_slots, struct blk_ksm_keyslot, ++ idle_slot_node); ++ slot_idx = blk_ksm_get_slot_idx(slot); ++ ++ err = ksm->ksm_ll_ops.keyslot_program(ksm, key, slot_idx); ++ if (err) { ++ wake_up(&ksm->idle_slots_wait_queue); ++ blk_ksm_hw_exit(ksm); ++ return errno_to_blk_status(err); ++ } ++ ++ /* Move this slot to the hash list for the new key. */ ++ if (slot->key) ++ hlist_del(&slot->hash_node); ++ slot->key = key; ++ hlist_add_head(&slot->hash_node, blk_ksm_hash_bucket_for_key(ksm, key)); ++ ++ atomic_set(&slot->slot_refs, 1); ++ ++ blk_ksm_remove_slot_from_lru_list(slot); ++ ++ blk_ksm_hw_exit(ksm); ++success: ++ *slot_ptr = slot; ++ return BLK_STS_OK; ++} ++ ++/** ++ * blk_ksm_put_slot() - Release a reference to a slot ++ * @slot: The keyslot to release the reference of. ++ * ++ * Context: Any context. ++ */ ++void blk_ksm_put_slot(struct blk_ksm_keyslot *slot) ++{ ++ struct blk_keyslot_manager *ksm; ++ unsigned long flags; ++ ++ if (!slot) ++ return; ++ ++ ksm = slot->ksm; ++ ++ if (atomic_dec_and_lock_irqsave(&slot->slot_refs, ++ &ksm->idle_slots_lock, flags)) { ++ list_add_tail(&slot->idle_slot_node, &ksm->idle_slots); ++ spin_unlock_irqrestore(&ksm->idle_slots_lock, flags); ++ wake_up(&ksm->idle_slots_wait_queue); ++ } ++} ++ ++/** ++ * blk_ksm_crypto_cfg_supported() - Find out if a crypto configuration is ++ * supported by a ksm. ++ * @ksm: The keyslot manager to check ++ * @cfg: The crypto configuration to check for. ++ * ++ * Checks for crypto_mode/data unit size/dun bytes support. ++ * ++ * Return: Whether or not this ksm supports the specified crypto config. ++ */ ++bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm, ++ const struct blk_crypto_config *cfg) ++{ ++ if (!ksm) ++ return false; ++ if (!(ksm->crypto_modes_supported[cfg->crypto_mode] & ++ cfg->data_unit_size)) ++ return false; ++ if (ksm->max_dun_bytes_supported < cfg->dun_bytes) ++ return false; ++ return true; ++} ++ ++/** ++ * blk_ksm_evict_key() - Evict a key from the lower layer device. ++ * @ksm: The keyslot manager to evict from ++ * @key: The key to evict ++ * ++ * Find the keyslot that the specified key was programmed into, and evict that ++ * slot from the lower layer device. The slot must not be in use by any ++ * in-flight IO when this function is called. ++ * ++ * Context: Process context. Takes and releases ksm->lock. ++ * Return: 0 on success or if there's no keyslot with the specified key, -EBUSY ++ * if the keyslot is still in use, or another -errno value on other ++ * error. ++ */ ++int blk_ksm_evict_key(struct blk_keyslot_manager *ksm, ++ const struct blk_crypto_key *key) ++{ ++ struct blk_ksm_keyslot *slot; ++ int err = 0; ++ ++ if (blk_ksm_is_passthrough(ksm)) { ++ if (ksm->ksm_ll_ops.keyslot_evict) { ++ blk_ksm_hw_enter(ksm); ++ err = ksm->ksm_ll_ops.keyslot_evict(ksm, key, -1); ++ blk_ksm_hw_exit(ksm); ++ return err; ++ } ++ return 0; ++ } ++ ++ blk_ksm_hw_enter(ksm); ++ slot = blk_ksm_find_keyslot(ksm, key); ++ if (!slot) ++ goto out_unlock; ++ ++ if (WARN_ON_ONCE(atomic_read(&slot->slot_refs) != 0)) { ++ err = -EBUSY; ++ goto out_unlock; ++ } ++ err = ksm->ksm_ll_ops.keyslot_evict(ksm, key, ++ blk_ksm_get_slot_idx(slot)); ++ if (err) ++ goto out_unlock; ++ ++ hlist_del(&slot->hash_node); ++ slot->key = NULL; ++ err = 0; ++out_unlock: ++ blk_ksm_hw_exit(ksm); ++ return err; ++} ++ ++/** ++ * blk_ksm_reprogram_all_keys() - Re-program all keyslots. ++ * @ksm: The keyslot manager ++ * ++ * Re-program all keyslots that are supposed to have a key programmed. This is ++ * intended only for use by drivers for hardware that loses its keys on reset. ++ * ++ * Context: Process context. Takes and releases ksm->lock. ++ */ ++void blk_ksm_reprogram_all_keys(struct blk_keyslot_manager *ksm) ++{ ++ unsigned int slot; ++ ++ if (blk_ksm_is_passthrough(ksm)) ++ return; ++ ++ /* This is for device initialization, so don't resume the device */ ++ down_write(&ksm->lock); ++ for (slot = 0; slot < ksm->num_slots; slot++) { ++ const struct blk_crypto_key *key = ksm->slots[slot].key; ++ int err; ++ ++ if (!key) ++ continue; ++ ++ err = ksm->ksm_ll_ops.keyslot_program(ksm, key, slot); ++ WARN_ON(err); ++ } ++ up_write(&ksm->lock); ++} ++EXPORT_SYMBOL_GPL(blk_ksm_reprogram_all_keys); ++ ++void blk_ksm_destroy(struct blk_keyslot_manager *ksm) ++{ ++ if (!ksm) ++ return; ++ kvfree(ksm->slot_hashtable); ++ kvfree_sensitive(ksm->slots, sizeof(ksm->slots[0]) * ksm->num_slots); ++ memzero_explicit(ksm, sizeof(*ksm)); ++} ++EXPORT_SYMBOL_GPL(blk_ksm_destroy); ++ ++bool blk_ksm_register(struct blk_keyslot_manager *ksm, struct request_queue *q) ++{ ++ if (blk_integrity_queue_supports_integrity(q)) { ++ pr_warn("Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n"); ++ return false; ++ } ++ q->ksm = ksm; ++ return true; ++} ++EXPORT_SYMBOL_GPL(blk_ksm_register); ++ ++void blk_ksm_unregister(struct request_queue *q) ++{ ++ q->ksm = NULL; ++} ++ ++/** ++ * blk_ksm_intersect_modes() - restrict supported modes by child device ++ * @parent: The keyslot manager for parent device ++ * @child: The keyslot manager for child device, or NULL ++ * ++ * Clear any crypto mode support bits in @parent that aren't set in @child. ++ * If @child is NULL, then all parent bits are cleared. ++ * ++ * Only use this when setting up the keyslot manager for a layered device, ++ * before it's been exposed yet. ++ */ ++void blk_ksm_intersect_modes(struct blk_keyslot_manager *parent, ++ const struct blk_keyslot_manager *child) ++{ ++ if (child) { ++ unsigned int i; ++ ++ parent->max_dun_bytes_supported = ++ min(parent->max_dun_bytes_supported, ++ child->max_dun_bytes_supported); ++ for (i = 0; i < ARRAY_SIZE(child->crypto_modes_supported); ++ i++) { ++ parent->crypto_modes_supported[i] &= ++ child->crypto_modes_supported[i]; ++ } ++ } else { ++ parent->max_dun_bytes_supported = 0; ++ memset(parent->crypto_modes_supported, 0, ++ sizeof(parent->crypto_modes_supported)); ++ } ++} ++EXPORT_SYMBOL_GPL(blk_ksm_intersect_modes); ++ ++/** ++ * blk_ksm_is_superset() - Check if a KSM supports a superset of crypto modes ++ * and DUN bytes that another KSM supports. Here, ++ * "superset" refers to the mathematical meaning of the ++ * word - i.e. if two KSMs have the *same* capabilities, ++ * they *are* considered supersets of each other. ++ * @ksm_superset: The KSM that we want to verify is a superset ++ * @ksm_subset: The KSM that we want to verify is a subset ++ * ++ * Return: True if @ksm_superset supports a superset of the crypto modes and DUN ++ * bytes that @ksm_subset supports. ++ */ ++bool blk_ksm_is_superset(struct blk_keyslot_manager *ksm_superset, ++ struct blk_keyslot_manager *ksm_subset) ++{ ++ int i; ++ ++ if (!ksm_subset) ++ return true; ++ ++ if (!ksm_superset) ++ return false; ++ ++ for (i = 0; i < ARRAY_SIZE(ksm_superset->crypto_modes_supported); i++) { ++ if (ksm_subset->crypto_modes_supported[i] & ++ (~ksm_superset->crypto_modes_supported[i])) { ++ return false; ++ } ++ } ++ ++ if (ksm_subset->max_dun_bytes_supported > ++ ksm_superset->max_dun_bytes_supported) { ++ return false; ++ } ++ ++ return true; ++} ++EXPORT_SYMBOL_GPL(blk_ksm_is_superset); ++ ++/** ++ * blk_ksm_update_capabilities() - Update the restrictions of a KSM to those of ++ * another KSM ++ * @target_ksm: The KSM whose restrictions to update. ++ * @reference_ksm: The KSM to whose restrictions this function will update ++ * @target_ksm's restrictions to. ++ * ++ * Blk-crypto requires that crypto capabilities that were ++ * advertised when a bio was created continue to be supported by the ++ * device until that bio is ended. This is turn means that a device cannot ++ * shrink its advertised crypto capabilities without any explicit ++ * synchronization with upper layers. So if there's no such explicit ++ * synchronization, @reference_ksm must support all the crypto capabilities that ++ * @target_ksm does ++ * (i.e. we need blk_ksm_is_superset(@reference_ksm, @target_ksm) == true). ++ * ++ * Note also that as long as the crypto capabilities are being expanded, the ++ * order of updates becoming visible is not important because it's alright ++ * for blk-crypto to see stale values - they only cause blk-crypto to ++ * believe that a crypto capability isn't supported when it actually is (which ++ * might result in blk-crypto-fallback being used if available, or the bio being ++ * failed). ++ */ ++void blk_ksm_update_capabilities(struct blk_keyslot_manager *target_ksm, ++ struct blk_keyslot_manager *reference_ksm) ++{ ++ memcpy(target_ksm->crypto_modes_supported, ++ reference_ksm->crypto_modes_supported, ++ sizeof(target_ksm->crypto_modes_supported)); ++ ++ target_ksm->max_dun_bytes_supported = ++ reference_ksm->max_dun_bytes_supported; ++} ++EXPORT_SYMBOL_GPL(blk_ksm_update_capabilities); ++ ++/** ++ * blk_ksm_init_passthrough() - Init a passthrough keyslot manager ++ * @ksm: The keyslot manager to init ++ * ++ * Initialize a passthrough keyslot manager. ++ * Called by e.g. storage drivers to set up a keyslot manager in their ++ * request_queue, when the storage driver wants to manage its keys by itself. ++ * This is useful for inline encryption hardware that doesn't have the concept ++ * of keyslots, and for layered devices. ++ */ ++void blk_ksm_init_passthrough(struct blk_keyslot_manager *ksm) ++{ ++ memset(ksm, 0, sizeof(*ksm)); ++ init_rwsem(&ksm->lock); ++} ++EXPORT_SYMBOL_GPL(blk_ksm_init_passthrough); +diff --git a/block/blk-crypto.c b/block/blk-crypto.c +index 8f53f4a1f9e2..76ce7a5d2676 100644 +--- a/block/blk-crypto.c ++++ b/block/blk-crypto.c +@@ -11,7 +11,7 @@ + + #include + #include +-#include ++#include + #include + #include + +diff --git a/block/keyslot-manager.c b/block/keyslot-manager.c +deleted file mode 100644 +index 1792159d12d1..000000000000 +--- a/block/keyslot-manager.c ++++ /dev/null +@@ -1,579 +0,0 @@ +-// SPDX-License-Identifier: GPL-2.0 +-/* +- * Copyright 2019 Google LLC +- */ +- +-/** +- * DOC: The Keyslot Manager +- * +- * Many devices with inline encryption support have a limited number of "slots" +- * into which encryption contexts may be programmed, and requests can be tagged +- * with a slot number to specify the key to use for en/decryption. +- * +- * As the number of slots is limited, and programming keys is expensive on +- * many inline encryption hardware, we don't want to program the same key into +- * multiple slots - if multiple requests are using the same key, we want to +- * program just one slot with that key and use that slot for all requests. +- * +- * The keyslot manager manages these keyslots appropriately, and also acts as +- * an abstraction between the inline encryption hardware and the upper layers. +- * +- * Lower layer devices will set up a keyslot manager in their request queue +- * and tell it how to perform device specific operations like programming/ +- * evicting keys from keyslots. +- * +- * Upper layers will call blk_ksm_get_slot_for_key() to program a +- * key into some slot in the inline encryption hardware. +- */ +- +-#define pr_fmt(fmt) "blk-crypto: " fmt +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-struct blk_ksm_keyslot { +- atomic_t slot_refs; +- struct list_head idle_slot_node; +- struct hlist_node hash_node; +- const struct blk_crypto_key *key; +- struct blk_keyslot_manager *ksm; +-}; +- +-static inline void blk_ksm_hw_enter(struct blk_keyslot_manager *ksm) +-{ +- /* +- * Calling into the driver requires ksm->lock held and the device +- * resumed. But we must resume the device first, since that can acquire +- * and release ksm->lock via blk_ksm_reprogram_all_keys(). +- */ +- if (ksm->dev) +- pm_runtime_get_sync(ksm->dev); +- down_write(&ksm->lock); +-} +- +-static inline void blk_ksm_hw_exit(struct blk_keyslot_manager *ksm) +-{ +- up_write(&ksm->lock); +- if (ksm->dev) +- pm_runtime_put_sync(ksm->dev); +-} +- +-static inline bool blk_ksm_is_passthrough(struct blk_keyslot_manager *ksm) +-{ +- return ksm->num_slots == 0; +-} +- +-/** +- * blk_ksm_init() - Initialize a keyslot manager +- * @ksm: The keyslot_manager to initialize. +- * @num_slots: The number of key slots to manage. +- * +- * Allocate memory for keyslots and initialize a keyslot manager. Called by +- * e.g. storage drivers to set up a keyslot manager in their request_queue. +- * +- * Return: 0 on success, or else a negative error code. +- */ +-int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots) +-{ +- unsigned int slot; +- unsigned int i; +- unsigned int slot_hashtable_size; +- +- memset(ksm, 0, sizeof(*ksm)); +- +- if (num_slots == 0) +- return -EINVAL; +- +- ksm->slots = kvcalloc(num_slots, sizeof(ksm->slots[0]), GFP_KERNEL); +- if (!ksm->slots) +- return -ENOMEM; +- +- ksm->num_slots = num_slots; +- +- init_rwsem(&ksm->lock); +- +- init_waitqueue_head(&ksm->idle_slots_wait_queue); +- INIT_LIST_HEAD(&ksm->idle_slots); +- +- for (slot = 0; slot < num_slots; slot++) { +- ksm->slots[slot].ksm = ksm; +- list_add_tail(&ksm->slots[slot].idle_slot_node, +- &ksm->idle_slots); +- } +- +- spin_lock_init(&ksm->idle_slots_lock); +- +- slot_hashtable_size = roundup_pow_of_two(num_slots); +- /* +- * hash_ptr() assumes bits != 0, so ensure the hash table has at least 2 +- * buckets. This only makes a difference when there is only 1 keyslot. +- */ +- if (slot_hashtable_size < 2) +- slot_hashtable_size = 2; +- +- ksm->log_slot_ht_size = ilog2(slot_hashtable_size); +- ksm->slot_hashtable = kvmalloc_array(slot_hashtable_size, +- sizeof(ksm->slot_hashtable[0]), +- GFP_KERNEL); +- if (!ksm->slot_hashtable) +- goto err_destroy_ksm; +- for (i = 0; i < slot_hashtable_size; i++) +- INIT_HLIST_HEAD(&ksm->slot_hashtable[i]); +- +- return 0; +- +-err_destroy_ksm: +- blk_ksm_destroy(ksm); +- return -ENOMEM; +-} +-EXPORT_SYMBOL_GPL(blk_ksm_init); +- +-static void blk_ksm_destroy_callback(void *ksm) +-{ +- blk_ksm_destroy(ksm); +-} +- +-/** +- * devm_blk_ksm_init() - Resource-managed blk_ksm_init() +- * @dev: The device which owns the blk_keyslot_manager. +- * @ksm: The blk_keyslot_manager to initialize. +- * @num_slots: The number of key slots to manage. +- * +- * Like blk_ksm_init(), but causes blk_ksm_destroy() to be called automatically +- * on driver detach. +- * +- * Return: 0 on success, or else a negative error code. +- */ +-int devm_blk_ksm_init(struct device *dev, struct blk_keyslot_manager *ksm, +- unsigned int num_slots) +-{ +- int err = blk_ksm_init(ksm, num_slots); +- +- if (err) +- return err; +- +- return devm_add_action_or_reset(dev, blk_ksm_destroy_callback, ksm); +-} +-EXPORT_SYMBOL_GPL(devm_blk_ksm_init); +- +-static inline struct hlist_head * +-blk_ksm_hash_bucket_for_key(struct blk_keyslot_manager *ksm, +- const struct blk_crypto_key *key) +-{ +- return &ksm->slot_hashtable[hash_ptr(key, ksm->log_slot_ht_size)]; +-} +- +-static void blk_ksm_remove_slot_from_lru_list(struct blk_ksm_keyslot *slot) +-{ +- struct blk_keyslot_manager *ksm = slot->ksm; +- unsigned long flags; +- +- spin_lock_irqsave(&ksm->idle_slots_lock, flags); +- list_del(&slot->idle_slot_node); +- spin_unlock_irqrestore(&ksm->idle_slots_lock, flags); +-} +- +-static struct blk_ksm_keyslot *blk_ksm_find_keyslot( +- struct blk_keyslot_manager *ksm, +- const struct blk_crypto_key *key) +-{ +- const struct hlist_head *head = blk_ksm_hash_bucket_for_key(ksm, key); +- struct blk_ksm_keyslot *slotp; +- +- hlist_for_each_entry(slotp, head, hash_node) { +- if (slotp->key == key) +- return slotp; +- } +- return NULL; +-} +- +-static struct blk_ksm_keyslot *blk_ksm_find_and_grab_keyslot( +- struct blk_keyslot_manager *ksm, +- const struct blk_crypto_key *key) +-{ +- struct blk_ksm_keyslot *slot; +- +- slot = blk_ksm_find_keyslot(ksm, key); +- if (!slot) +- return NULL; +- if (atomic_inc_return(&slot->slot_refs) == 1) { +- /* Took first reference to this slot; remove it from LRU list */ +- blk_ksm_remove_slot_from_lru_list(slot); +- } +- return slot; +-} +- +-unsigned int blk_ksm_get_slot_idx(struct blk_ksm_keyslot *slot) +-{ +- return slot - slot->ksm->slots; +-} +-EXPORT_SYMBOL_GPL(blk_ksm_get_slot_idx); +- +-/** +- * blk_ksm_get_slot_for_key() - Program a key into a keyslot. +- * @ksm: The keyslot manager to program the key into. +- * @key: Pointer to the key object to program, including the raw key, crypto +- * mode, and data unit size. +- * @slot_ptr: A pointer to return the pointer of the allocated keyslot. +- * +- * Get a keyslot that's been programmed with the specified key. If one already +- * exists, return it with incremented refcount. Otherwise, wait for a keyslot +- * to become idle and program it. +- * +- * Context: Process context. Takes and releases ksm->lock. +- * Return: BLK_STS_OK on success (and keyslot is set to the pointer of the +- * allocated keyslot), or some other blk_status_t otherwise (and +- * keyslot is set to NULL). +- */ +-blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm, +- const struct blk_crypto_key *key, +- struct blk_ksm_keyslot **slot_ptr) +-{ +- struct blk_ksm_keyslot *slot; +- int slot_idx; +- int err; +- +- *slot_ptr = NULL; +- +- if (blk_ksm_is_passthrough(ksm)) +- return BLK_STS_OK; +- +- down_read(&ksm->lock); +- slot = blk_ksm_find_and_grab_keyslot(ksm, key); +- up_read(&ksm->lock); +- if (slot) +- goto success; +- +- for (;;) { +- blk_ksm_hw_enter(ksm); +- slot = blk_ksm_find_and_grab_keyslot(ksm, key); +- if (slot) { +- blk_ksm_hw_exit(ksm); +- goto success; +- } +- +- /* +- * If we're here, that means there wasn't a slot that was +- * already programmed with the key. So try to program it. +- */ +- if (!list_empty(&ksm->idle_slots)) +- break; +- +- blk_ksm_hw_exit(ksm); +- wait_event(ksm->idle_slots_wait_queue, +- !list_empty(&ksm->idle_slots)); +- } +- +- slot = list_first_entry(&ksm->idle_slots, struct blk_ksm_keyslot, +- idle_slot_node); +- slot_idx = blk_ksm_get_slot_idx(slot); +- +- err = ksm->ksm_ll_ops.keyslot_program(ksm, key, slot_idx); +- if (err) { +- wake_up(&ksm->idle_slots_wait_queue); +- blk_ksm_hw_exit(ksm); +- return errno_to_blk_status(err); +- } +- +- /* Move this slot to the hash list for the new key. */ +- if (slot->key) +- hlist_del(&slot->hash_node); +- slot->key = key; +- hlist_add_head(&slot->hash_node, blk_ksm_hash_bucket_for_key(ksm, key)); +- +- atomic_set(&slot->slot_refs, 1); +- +- blk_ksm_remove_slot_from_lru_list(slot); +- +- blk_ksm_hw_exit(ksm); +-success: +- *slot_ptr = slot; +- return BLK_STS_OK; +-} +- +-/** +- * blk_ksm_put_slot() - Release a reference to a slot +- * @slot: The keyslot to release the reference of. +- * +- * Context: Any context. +- */ +-void blk_ksm_put_slot(struct blk_ksm_keyslot *slot) +-{ +- struct blk_keyslot_manager *ksm; +- unsigned long flags; +- +- if (!slot) +- return; +- +- ksm = slot->ksm; +- +- if (atomic_dec_and_lock_irqsave(&slot->slot_refs, +- &ksm->idle_slots_lock, flags)) { +- list_add_tail(&slot->idle_slot_node, &ksm->idle_slots); +- spin_unlock_irqrestore(&ksm->idle_slots_lock, flags); +- wake_up(&ksm->idle_slots_wait_queue); +- } +-} +- +-/** +- * blk_ksm_crypto_cfg_supported() - Find out if a crypto configuration is +- * supported by a ksm. +- * @ksm: The keyslot manager to check +- * @cfg: The crypto configuration to check for. +- * +- * Checks for crypto_mode/data unit size/dun bytes support. +- * +- * Return: Whether or not this ksm supports the specified crypto config. +- */ +-bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm, +- const struct blk_crypto_config *cfg) +-{ +- if (!ksm) +- return false; +- if (!(ksm->crypto_modes_supported[cfg->crypto_mode] & +- cfg->data_unit_size)) +- return false; +- if (ksm->max_dun_bytes_supported < cfg->dun_bytes) +- return false; +- return true; +-} +- +-/** +- * blk_ksm_evict_key() - Evict a key from the lower layer device. +- * @ksm: The keyslot manager to evict from +- * @key: The key to evict +- * +- * Find the keyslot that the specified key was programmed into, and evict that +- * slot from the lower layer device. The slot must not be in use by any +- * in-flight IO when this function is called. +- * +- * Context: Process context. Takes and releases ksm->lock. +- * Return: 0 on success or if there's no keyslot with the specified key, -EBUSY +- * if the keyslot is still in use, or another -errno value on other +- * error. +- */ +-int blk_ksm_evict_key(struct blk_keyslot_manager *ksm, +- const struct blk_crypto_key *key) +-{ +- struct blk_ksm_keyslot *slot; +- int err = 0; +- +- if (blk_ksm_is_passthrough(ksm)) { +- if (ksm->ksm_ll_ops.keyslot_evict) { +- blk_ksm_hw_enter(ksm); +- err = ksm->ksm_ll_ops.keyslot_evict(ksm, key, -1); +- blk_ksm_hw_exit(ksm); +- return err; +- } +- return 0; +- } +- +- blk_ksm_hw_enter(ksm); +- slot = blk_ksm_find_keyslot(ksm, key); +- if (!slot) +- goto out_unlock; +- +- if (WARN_ON_ONCE(atomic_read(&slot->slot_refs) != 0)) { +- err = -EBUSY; +- goto out_unlock; +- } +- err = ksm->ksm_ll_ops.keyslot_evict(ksm, key, +- blk_ksm_get_slot_idx(slot)); +- if (err) +- goto out_unlock; +- +- hlist_del(&slot->hash_node); +- slot->key = NULL; +- err = 0; +-out_unlock: +- blk_ksm_hw_exit(ksm); +- return err; +-} +- +-/** +- * blk_ksm_reprogram_all_keys() - Re-program all keyslots. +- * @ksm: The keyslot manager +- * +- * Re-program all keyslots that are supposed to have a key programmed. This is +- * intended only for use by drivers for hardware that loses its keys on reset. +- * +- * Context: Process context. Takes and releases ksm->lock. +- */ +-void blk_ksm_reprogram_all_keys(struct blk_keyslot_manager *ksm) +-{ +- unsigned int slot; +- +- if (blk_ksm_is_passthrough(ksm)) +- return; +- +- /* This is for device initialization, so don't resume the device */ +- down_write(&ksm->lock); +- for (slot = 0; slot < ksm->num_slots; slot++) { +- const struct blk_crypto_key *key = ksm->slots[slot].key; +- int err; +- +- if (!key) +- continue; +- +- err = ksm->ksm_ll_ops.keyslot_program(ksm, key, slot); +- WARN_ON(err); +- } +- up_write(&ksm->lock); +-} +-EXPORT_SYMBOL_GPL(blk_ksm_reprogram_all_keys); +- +-void blk_ksm_destroy(struct blk_keyslot_manager *ksm) +-{ +- if (!ksm) +- return; +- kvfree(ksm->slot_hashtable); +- kvfree_sensitive(ksm->slots, sizeof(ksm->slots[0]) * ksm->num_slots); +- memzero_explicit(ksm, sizeof(*ksm)); +-} +-EXPORT_SYMBOL_GPL(blk_ksm_destroy); +- +-bool blk_ksm_register(struct blk_keyslot_manager *ksm, struct request_queue *q) +-{ +- if (blk_integrity_queue_supports_integrity(q)) { +- pr_warn("Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n"); +- return false; +- } +- q->ksm = ksm; +- return true; +-} +-EXPORT_SYMBOL_GPL(blk_ksm_register); +- +-void blk_ksm_unregister(struct request_queue *q) +-{ +- q->ksm = NULL; +-} +- +-/** +- * blk_ksm_intersect_modes() - restrict supported modes by child device +- * @parent: The keyslot manager for parent device +- * @child: The keyslot manager for child device, or NULL +- * +- * Clear any crypto mode support bits in @parent that aren't set in @child. +- * If @child is NULL, then all parent bits are cleared. +- * +- * Only use this when setting up the keyslot manager for a layered device, +- * before it's been exposed yet. +- */ +-void blk_ksm_intersect_modes(struct blk_keyslot_manager *parent, +- const struct blk_keyslot_manager *child) +-{ +- if (child) { +- unsigned int i; +- +- parent->max_dun_bytes_supported = +- min(parent->max_dun_bytes_supported, +- child->max_dun_bytes_supported); +- for (i = 0; i < ARRAY_SIZE(child->crypto_modes_supported); +- i++) { +- parent->crypto_modes_supported[i] &= +- child->crypto_modes_supported[i]; +- } +- } else { +- parent->max_dun_bytes_supported = 0; +- memset(parent->crypto_modes_supported, 0, +- sizeof(parent->crypto_modes_supported)); +- } +-} +-EXPORT_SYMBOL_GPL(blk_ksm_intersect_modes); +- +-/** +- * blk_ksm_is_superset() - Check if a KSM supports a superset of crypto modes +- * and DUN bytes that another KSM supports. Here, +- * "superset" refers to the mathematical meaning of the +- * word - i.e. if two KSMs have the *same* capabilities, +- * they *are* considered supersets of each other. +- * @ksm_superset: The KSM that we want to verify is a superset +- * @ksm_subset: The KSM that we want to verify is a subset +- * +- * Return: True if @ksm_superset supports a superset of the crypto modes and DUN +- * bytes that @ksm_subset supports. +- */ +-bool blk_ksm_is_superset(struct blk_keyslot_manager *ksm_superset, +- struct blk_keyslot_manager *ksm_subset) +-{ +- int i; +- +- if (!ksm_subset) +- return true; +- +- if (!ksm_superset) +- return false; +- +- for (i = 0; i < ARRAY_SIZE(ksm_superset->crypto_modes_supported); i++) { +- if (ksm_subset->crypto_modes_supported[i] & +- (~ksm_superset->crypto_modes_supported[i])) { +- return false; +- } +- } +- +- if (ksm_subset->max_dun_bytes_supported > +- ksm_superset->max_dun_bytes_supported) { +- return false; +- } +- +- return true; +-} +-EXPORT_SYMBOL_GPL(blk_ksm_is_superset); +- +-/** +- * blk_ksm_update_capabilities() - Update the restrictions of a KSM to those of +- * another KSM +- * @target_ksm: The KSM whose restrictions to update. +- * @reference_ksm: The KSM to whose restrictions this function will update +- * @target_ksm's restrictions to. +- * +- * Blk-crypto requires that crypto capabilities that were +- * advertised when a bio was created continue to be supported by the +- * device until that bio is ended. This is turn means that a device cannot +- * shrink its advertised crypto capabilities without any explicit +- * synchronization with upper layers. So if there's no such explicit +- * synchronization, @reference_ksm must support all the crypto capabilities that +- * @target_ksm does +- * (i.e. we need blk_ksm_is_superset(@reference_ksm, @target_ksm) == true). +- * +- * Note also that as long as the crypto capabilities are being expanded, the +- * order of updates becoming visible is not important because it's alright +- * for blk-crypto to see stale values - they only cause blk-crypto to +- * believe that a crypto capability isn't supported when it actually is (which +- * might result in blk-crypto-fallback being used if available, or the bio being +- * failed). +- */ +-void blk_ksm_update_capabilities(struct blk_keyslot_manager *target_ksm, +- struct blk_keyslot_manager *reference_ksm) +-{ +- memcpy(target_ksm->crypto_modes_supported, +- reference_ksm->crypto_modes_supported, +- sizeof(target_ksm->crypto_modes_supported)); +- +- target_ksm->max_dun_bytes_supported = +- reference_ksm->max_dun_bytes_supported; +-} +-EXPORT_SYMBOL_GPL(blk_ksm_update_capabilities); +- +-/** +- * blk_ksm_init_passthrough() - Init a passthrough keyslot manager +- * @ksm: The keyslot manager to init +- * +- * Initialize a passthrough keyslot manager. +- * Called by e.g. storage drivers to set up a keyslot manager in their +- * request_queue, when the storage driver wants to manage its keys by itself. +- * This is useful for inline encryption hardware that doesn't have the concept +- * of keyslots, and for layered devices. +- */ +-void blk_ksm_init_passthrough(struct blk_keyslot_manager *ksm) +-{ +- memset(ksm, 0, sizeof(*ksm)); +- init_rwsem(&ksm->lock); +-} +-EXPORT_SYMBOL_GPL(blk_ksm_init_passthrough); +diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h +index 55dccdfbcb22..841ed87999e7 100644 +--- a/drivers/md/dm-core.h ++++ b/drivers/md/dm-core.h +@@ -13,7 +13,7 @@ + #include + #include + #include +-#include ++#include + + #include + +diff --git a/drivers/md/dm.c b/drivers/md/dm.c +index 7870e6460633..4184fd8ccb08 100644 +--- a/drivers/md/dm.c ++++ b/drivers/md/dm.c +@@ -29,7 +29,7 @@ + #include + #include + #include +-#include ++#include + + #define DM_MSG_PREFIX "core" + +diff --git a/drivers/mmc/host/cqhci-crypto.c b/drivers/mmc/host/cqhci-crypto.c +index 6419cfbb4ab7..628bbfaf8312 100644 +--- a/drivers/mmc/host/cqhci-crypto.c ++++ b/drivers/mmc/host/cqhci-crypto.c +@@ -6,7 +6,7 @@ + */ + + #include +-#include ++#include + #include + + #include "cqhci-crypto.h" +diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h +index 41f6e06f9185..885fcf2e5922 100644 +--- a/drivers/scsi/ufs/ufshcd.h ++++ b/drivers/scsi/ufs/ufshcd.h +@@ -32,7 +32,7 @@ + #include + #include + #include +-#include ++#include + #include "unipro.h" + + #include +diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h +new file mode 100644 +index 000000000000..a27605e2f826 +--- /dev/null ++++ b/include/linux/blk-crypto-profile.h +@@ -0,0 +1,120 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Copyright 2019 Google LLC ++ */ ++ ++#ifndef __LINUX_KEYSLOT_MANAGER_H ++#define __LINUX_KEYSLOT_MANAGER_H ++ ++#include ++#include ++ ++struct blk_keyslot_manager; ++ ++/** ++ * struct blk_ksm_ll_ops - functions to manage keyslots in hardware ++ * @keyslot_program: Program the specified key into the specified slot in the ++ * inline encryption hardware. ++ * @keyslot_evict: Evict key from the specified keyslot in the hardware. ++ * The key is provided so that e.g. dm layers can evict ++ * keys from the devices that they map over. ++ * Returns 0 on success, -errno otherwise. ++ * ++ * This structure should be provided by storage device drivers when they set up ++ * a keyslot manager - this structure holds the function ptrs that the keyslot ++ * manager will use to manipulate keyslots in the hardware. ++ */ ++struct blk_ksm_ll_ops { ++ int (*keyslot_program)(struct blk_keyslot_manager *ksm, ++ const struct blk_crypto_key *key, ++ unsigned int slot); ++ int (*keyslot_evict)(struct blk_keyslot_manager *ksm, ++ const struct blk_crypto_key *key, ++ unsigned int slot); ++}; ++ ++struct blk_keyslot_manager { ++ /* ++ * The struct blk_ksm_ll_ops that this keyslot manager will use ++ * to perform operations like programming and evicting keys on the ++ * device ++ */ ++ struct blk_ksm_ll_ops ksm_ll_ops; ++ ++ /* ++ * The maximum number of bytes supported for specifying the data unit ++ * number. ++ */ ++ unsigned int max_dun_bytes_supported; ++ ++ /* ++ * Array of size BLK_ENCRYPTION_MODE_MAX of bitmasks that represents ++ * whether a crypto mode and data unit size are supported. The i'th ++ * bit of crypto_mode_supported[crypto_mode] is set iff a data unit ++ * size of (1 << i) is supported. We only support data unit sizes ++ * that are powers of 2. ++ */ ++ unsigned int crypto_modes_supported[BLK_ENCRYPTION_MODE_MAX]; ++ ++ /* Device for runtime power management (NULL if none) */ ++ struct device *dev; ++ ++ /* Here onwards are *private* fields for internal keyslot manager use */ ++ ++ unsigned int num_slots; ++ ++ /* Protects programming and evicting keys from the device */ ++ struct rw_semaphore lock; ++ ++ /* List of idle slots, with least recently used slot at front */ ++ wait_queue_head_t idle_slots_wait_queue; ++ struct list_head idle_slots; ++ spinlock_t idle_slots_lock; ++ ++ /* ++ * Hash table which maps struct *blk_crypto_key to keyslots, so that we ++ * can find a key's keyslot in O(1) time rather than O(num_slots). ++ * Protected by 'lock'. ++ */ ++ struct hlist_head *slot_hashtable; ++ unsigned int log_slot_ht_size; ++ ++ /* Per-keyslot data */ ++ struct blk_ksm_keyslot *slots; ++}; ++ ++int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots); ++ ++int devm_blk_ksm_init(struct device *dev, struct blk_keyslot_manager *ksm, ++ unsigned int num_slots); ++ ++blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm, ++ const struct blk_crypto_key *key, ++ struct blk_ksm_keyslot **slot_ptr); ++ ++unsigned int blk_ksm_get_slot_idx(struct blk_ksm_keyslot *slot); ++ ++void blk_ksm_put_slot(struct blk_ksm_keyslot *slot); ++ ++bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm, ++ const struct blk_crypto_config *cfg); ++ ++int blk_ksm_evict_key(struct blk_keyslot_manager *ksm, ++ const struct blk_crypto_key *key); ++ ++void blk_ksm_reprogram_all_keys(struct blk_keyslot_manager *ksm); ++ ++void blk_ksm_destroy(struct blk_keyslot_manager *ksm); ++ ++void blk_ksm_intersect_modes(struct blk_keyslot_manager *parent, ++ const struct blk_keyslot_manager *child); ++ ++void blk_ksm_init_passthrough(struct blk_keyslot_manager *ksm); ++ ++bool blk_ksm_is_superset(struct blk_keyslot_manager *ksm_superset, ++ struct blk_keyslot_manager *ksm_subset); ++ ++void blk_ksm_update_capabilities(struct blk_keyslot_manager *target_ksm, ++ struct blk_keyslot_manager *reference_ksm); ++ ++#endif /* __LINUX_KEYSLOT_MANAGER_H */ +diff --git a/include/linux/keyslot-manager.h b/include/linux/keyslot-manager.h +deleted file mode 100644 +index a27605e2f826..000000000000 +--- a/include/linux/keyslot-manager.h ++++ /dev/null +@@ -1,120 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0 */ +-/* +- * Copyright 2019 Google LLC +- */ +- +-#ifndef __LINUX_KEYSLOT_MANAGER_H +-#define __LINUX_KEYSLOT_MANAGER_H +- +-#include +-#include +- +-struct blk_keyslot_manager; +- +-/** +- * struct blk_ksm_ll_ops - functions to manage keyslots in hardware +- * @keyslot_program: Program the specified key into the specified slot in the +- * inline encryption hardware. +- * @keyslot_evict: Evict key from the specified keyslot in the hardware. +- * The key is provided so that e.g. dm layers can evict +- * keys from the devices that they map over. +- * Returns 0 on success, -errno otherwise. +- * +- * This structure should be provided by storage device drivers when they set up +- * a keyslot manager - this structure holds the function ptrs that the keyslot +- * manager will use to manipulate keyslots in the hardware. +- */ +-struct blk_ksm_ll_ops { +- int (*keyslot_program)(struct blk_keyslot_manager *ksm, +- const struct blk_crypto_key *key, +- unsigned int slot); +- int (*keyslot_evict)(struct blk_keyslot_manager *ksm, +- const struct blk_crypto_key *key, +- unsigned int slot); +-}; +- +-struct blk_keyslot_manager { +- /* +- * The struct blk_ksm_ll_ops that this keyslot manager will use +- * to perform operations like programming and evicting keys on the +- * device +- */ +- struct blk_ksm_ll_ops ksm_ll_ops; +- +- /* +- * The maximum number of bytes supported for specifying the data unit +- * number. +- */ +- unsigned int max_dun_bytes_supported; +- +- /* +- * Array of size BLK_ENCRYPTION_MODE_MAX of bitmasks that represents +- * whether a crypto mode and data unit size are supported. The i'th +- * bit of crypto_mode_supported[crypto_mode] is set iff a data unit +- * size of (1 << i) is supported. We only support data unit sizes +- * that are powers of 2. +- */ +- unsigned int crypto_modes_supported[BLK_ENCRYPTION_MODE_MAX]; +- +- /* Device for runtime power management (NULL if none) */ +- struct device *dev; +- +- /* Here onwards are *private* fields for internal keyslot manager use */ +- +- unsigned int num_slots; +- +- /* Protects programming and evicting keys from the device */ +- struct rw_semaphore lock; +- +- /* List of idle slots, with least recently used slot at front */ +- wait_queue_head_t idle_slots_wait_queue; +- struct list_head idle_slots; +- spinlock_t idle_slots_lock; +- +- /* +- * Hash table which maps struct *blk_crypto_key to keyslots, so that we +- * can find a key's keyslot in O(1) time rather than O(num_slots). +- * Protected by 'lock'. +- */ +- struct hlist_head *slot_hashtable; +- unsigned int log_slot_ht_size; +- +- /* Per-keyslot data */ +- struct blk_ksm_keyslot *slots; +-}; +- +-int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots); +- +-int devm_blk_ksm_init(struct device *dev, struct blk_keyslot_manager *ksm, +- unsigned int num_slots); +- +-blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm, +- const struct blk_crypto_key *key, +- struct blk_ksm_keyslot **slot_ptr); +- +-unsigned int blk_ksm_get_slot_idx(struct blk_ksm_keyslot *slot); +- +-void blk_ksm_put_slot(struct blk_ksm_keyslot *slot); +- +-bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm, +- const struct blk_crypto_config *cfg); +- +-int blk_ksm_evict_key(struct blk_keyslot_manager *ksm, +- const struct blk_crypto_key *key); +- +-void blk_ksm_reprogram_all_keys(struct blk_keyslot_manager *ksm); +- +-void blk_ksm_destroy(struct blk_keyslot_manager *ksm); +- +-void blk_ksm_intersect_modes(struct blk_keyslot_manager *parent, +- const struct blk_keyslot_manager *child); +- +-void blk_ksm_init_passthrough(struct blk_keyslot_manager *ksm); +- +-bool blk_ksm_is_superset(struct blk_keyslot_manager *ksm_superset, +- struct blk_keyslot_manager *ksm_subset); +- +-void blk_ksm_update_capabilities(struct blk_keyslot_manager *target_ksm, +- struct blk_keyslot_manager *reference_ksm); +- +-#endif /* __LINUX_KEYSLOT_MANAGER_H */ +diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h +index 0c0c9a0fdf57..725b1de41767 100644 +--- a/include/linux/mmc/host.h ++++ b/include/linux/mmc/host.h +@@ -15,7 +15,7 @@ + #include + #include + #include +-#include ++#include + + struct mmc_ios { + unsigned int clock; /* clock rate */ +-- +2.35.3 + diff --git a/patches.suse/blk-crypto-update-inline-encryption-documentation.patch b/patches.suse/blk-crypto-update-inline-encryption-documentation.patch new file mode 100644 index 0000000..c720f45 --- /dev/null +++ b/patches.suse/blk-crypto-update-inline-encryption-documentation.patch @@ -0,0 +1,513 @@ +From: Eric Biggers +Date: Mon, 18 Oct 2021 11:04:53 -0700 +Subject: [PATCH] blk-crypto: update inline encryption documentation +Git-commit: 8e9f666a6e66d3f882c094646d35536d2759103a +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Rework most of inline-encryption.rst to be easier to follow, to correct +some information, to add some important details and remove some +unimportant details, and to take into account the renaming from +blk_keyslot_manager to blk_crypto_profile. + +Reviewed-by: Mike Snitzer +Reviewed-by: Martin K. Petersen +Signed-off-by: Eric Biggers +Link: https://lore.kernel.org/r/20211018180453.40441-5-ebiggers@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + Documentation/block/inline-encryption.rst | 451 ++++++++++++---------- + 1 file changed, 245 insertions(+), 206 deletions(-) + +diff --git a/Documentation/block/inline-encryption.rst b/Documentation/block/inline-encryption.rst +index 7f9b40d6b416..71d1044617a9 100644 +--- a/Documentation/block/inline-encryption.rst ++++ b/Documentation/block/inline-encryption.rst +@@ -7,230 +7,269 @@ Inline Encryption + Background + ========== + +-Inline encryption hardware sits logically between memory and the disk, and can +-en/decrypt data as it goes in/out of the disk. Inline encryption hardware has a +-fixed number of "keyslots" - slots into which encryption contexts (i.e. the +-encryption key, encryption algorithm, data unit size) can be programmed by the +-kernel at any time. Each request sent to the disk can be tagged with the index +-of a keyslot (and also a data unit number to act as an encryption tweak), and +-the inline encryption hardware will en/decrypt the data in the request with the +-encryption context programmed into that keyslot. This is very different from +-full disk encryption solutions like self encrypting drives/TCG OPAL/ATA +-Security standards, since with inline encryption, any block on disk could be +-encrypted with any encryption context the kernel chooses. +- ++Inline encryption hardware sits logically between memory and disk, and can ++en/decrypt data as it goes in/out of the disk. For each I/O request, software ++can control exactly how the inline encryption hardware will en/decrypt the data ++in terms of key, algorithm, data unit size (the granularity of en/decryption), ++and data unit number (a value that determines the initialization vector(s)). ++ ++Some inline encryption hardware accepts all encryption parameters including raw ++keys directly in low-level I/O requests. However, most inline encryption ++hardware instead has a fixed number of "keyslots" and requires that the key, ++algorithm, and data unit size first be programmed into a keyslot. Each ++low-level I/O request then just contains a keyslot index and data unit number. ++ ++Note that inline encryption hardware is very different from traditional crypto ++accelerators, which are supported through the kernel crypto API. Traditional ++crypto accelerators operate on memory regions, whereas inline encryption ++hardware operates on I/O requests. Thus, inline encryption hardware needs to be ++managed by the block layer, not the kernel crypto API. ++ ++Inline encryption hardware is also very different from "self-encrypting drives", ++such as those based on the TCG Opal or ATA Security standards. Self-encrypting ++drives don't provide fine-grained control of encryption and provide no way to ++verify the correctness of the resulting ciphertext. Inline encryption hardware ++provides fine-grained control of encryption, including the choice of key and ++initialization vector for each sector, and can be tested for correctness. + + Objective + ========= + +-We want to support inline encryption (IE) in the kernel. +-To allow for testing, we also want a crypto API fallback when actual +-IE hardware is absent. We also want IE to work with layered devices +-like dm and loopback (i.e. we want to be able to use the IE hardware +-of the underlying devices if present, or else fall back to crypto API +-en/decryption). +- ++We want to support inline encryption in the kernel. To make testing easier, we ++also want support for falling back to the kernel crypto API when actual inline ++encryption hardware is absent. We also want inline encryption to work with ++layered devices like device-mapper and loopback (i.e. we want to be able to use ++the inline encryption hardware of the underlying devices if present, or else ++fall back to crypto API en/decryption). + + Constraints and notes + ===================== + +-- IE hardware has a limited number of "keyslots" that can be programmed +- with an encryption context (key, algorithm, data unit size, etc.) at any time. +- One can specify a keyslot in a data request made to the device, and the +- device will en/decrypt the data using the encryption context programmed into +- that specified keyslot. When possible, we want to make multiple requests with +- the same encryption context share the same keyslot. +- +-- We need a way for upper layers like filesystems to specify an encryption +- context to use for en/decrypting a struct bio, and a device driver (like UFS) +- needs to be able to use that encryption context when it processes the bio. +- +-- We need a way for device drivers to expose their inline encryption +- capabilities in a unified way to the upper layers. +- +- +-Design +-====== +- +-We add a struct bio_crypt_ctx to struct bio that can +-represent an encryption context, because we need to be able to pass this +-encryption context from the upper layers (like the fs layer) to the +-device driver to act upon. +- +-While IE hardware works on the notion of keyslots, the FS layer has no +-knowledge of keyslots - it simply wants to specify an encryption context to +-use while en/decrypting a bio. +- +-We introduce a keyslot manager (KSM) that handles the translation from +-encryption contexts specified by the FS to keyslots on the IE hardware. +-This KSM also serves as the way IE hardware can expose its capabilities to +-upper layers. The generic mode of operation is: each device driver that wants +-to support IE will construct a KSM and set it up in its struct request_queue. +-Upper layers that want to use IE on this device can then use this KSM in +-the device's struct request_queue to translate an encryption context into +-a keyslot. The presence of the KSM in the request queue shall be used to mean +-that the device supports IE. +- +-The KSM uses refcounts to track which keyslots are idle (either they have no +-encryption context programmed, or there are no in-flight struct bios +-referencing that keyslot). When a new encryption context needs a keyslot, it +-tries to find a keyslot that has already been programmed with the same +-encryption context, and if there is no such keyslot, it evicts the least +-recently used idle keyslot and programs the new encryption context into that +-one. If no idle keyslots are available, then the caller will sleep until there +-is at least one. +- +- +-blk-mq changes, other block layer changes and blk-crypto-fallback +-================================================================= +- +-We add a pointer to a ``bi_crypt_context`` and ``keyslot`` to +-struct request. These will be referred to as the ``crypto fields`` +-for the request. This ``keyslot`` is the keyslot into which the +-``bi_crypt_context`` has been programmed in the KSM of the ``request_queue`` +-that this request is being sent to. +- +-We introduce ``block/blk-crypto-fallback.c``, which allows upper layers to remain +-blissfully unaware of whether or not real inline encryption hardware is present +-underneath. When a bio is submitted with a target ``request_queue`` that doesn't +-support the encryption context specified with the bio, the block layer will +-en/decrypt the bio with the blk-crypto-fallback. +- +-If the bio is a ``WRITE`` bio, a bounce bio is allocated, and the data in the bio +-is encrypted stored in the bounce bio - blk-mq will then proceed to process the +-bounce bio as if it were not encrypted at all (except when blk-integrity is +-concerned). ``blk-crypto-fallback`` sets the bounce bio's ``bi_end_io`` to an +-internal function that cleans up the bounce bio and ends the original bio. +- +-If the bio is a ``READ`` bio, the bio's ``bi_end_io`` (and also ``bi_private``) +-is saved and overwritten by ``blk-crypto-fallback`` to +-``bio_crypto_fallback_decrypt_bio``. The bio's ``bi_crypt_context`` is also +-overwritten with ``NULL``, so that to the rest of the stack, the bio looks +-as if it was a regular bio that never had an encryption context specified. +-``bio_crypto_fallback_decrypt_bio`` will decrypt the bio, restore the original +-``bi_end_io`` (and also ``bi_private``) and end the bio again. +- +-Regardless of whether real inline encryption hardware is used or the ++- We need a way for upper layers (e.g. filesystems) to specify an encryption ++ context to use for en/decrypting a bio, and device drivers (e.g. UFSHCD) need ++ to be able to use that encryption context when they process the request. ++ Encryption contexts also introduce constraints on bio merging; the block layer ++ needs to be aware of these constraints. ++ ++- Different inline encryption hardware has different supported algorithms, ++ supported data unit sizes, maximum data unit numbers, etc. We call these ++ properties the "crypto capabilities". We need a way for device drivers to ++ advertise crypto capabilities to upper layers in a generic way. ++ ++- Inline encryption hardware usually (but not always) requires that keys be ++ programmed into keyslots before being used. Since programming keyslots may be ++ slow and there may not be very many keyslots, we shouldn't just program the ++ key for every I/O request, but rather keep track of which keys are in the ++ keyslots and reuse an already-programmed keyslot when possible. ++ ++- Upper layers typically define a specific end-of-life for crypto keys, e.g. ++ when an encrypted directory is locked or when a crypto mapping is torn down. ++ At these times, keys are wiped from memory. We must provide a way for upper ++ layers to also evict keys from any keyslots they are present in. ++ ++- When possible, device-mapper devices must be able to pass through the inline ++ encryption support of their underlying devices. However, it doesn't make ++ sense for device-mapper devices to have keyslots themselves. ++ ++Basic design ++============ ++ ++We introduce ``struct blk_crypto_key`` to represent an inline encryption key and ++how it will be used. This includes the actual bytes of the key; the size of the ++key; the algorithm and data unit size the key will be used with; and the number ++of bytes needed to represent the maximum data unit number the key will be used ++with. ++ ++We introduce ``struct bio_crypt_ctx`` to represent an encryption context. It ++contains a data unit number and a pointer to a blk_crypto_key. We add pointers ++to a bio_crypt_ctx to ``struct bio`` and ``struct request``; this allows users ++of the block layer (e.g. filesystems) to provide an encryption context when ++creating a bio and have it be passed down the stack for processing by the block ++layer and device drivers. Note that the encryption context doesn't explicitly ++say whether to encrypt or decrypt, as that is implicit from the direction of the ++bio; WRITE means encrypt, and READ means decrypt. ++ ++We also introduce ``struct blk_crypto_profile`` to contain all generic inline ++encryption-related state for a particular inline encryption device. The ++blk_crypto_profile serves as the way that drivers for inline encryption hardware ++advertise their crypto capabilities and provide certain functions (e.g., ++functions to program and evict keys) to upper layers. Each device driver that ++wants to support inline encryption will construct a blk_crypto_profile, then ++associate it with the disk's request_queue. ++ ++The blk_crypto_profile also manages the hardware's keyslots, when applicable. ++This happens in the block layer, so that users of the block layer can just ++specify encryption contexts and don't need to know about keyslots at all, nor do ++device drivers need to care about most details of keyslot management. ++ ++Specifically, for each keyslot, the block layer (via the blk_crypto_profile) ++keeps track of which blk_crypto_key that keyslot contains (if any), and how many ++in-flight I/O requests are using it. When the block layer creates a ++``struct request`` for a bio that has an encryption context, it grabs a keyslot ++that already contains the key if possible. Otherwise it waits for an idle ++keyslot (a keyslot that isn't in-use by any I/O), then programs the key into the ++least-recently-used idle keyslot using the function the device driver provided. ++In both cases, the resulting keyslot is stored in the ``crypt_keyslot`` field of ++the request, where it is then accessible to device drivers and is released after ++the request completes. ++ ++``struct request`` also contains a pointer to the original bio_crypt_ctx. ++Requests can be built from multiple bios, and the block layer must take the ++encryption context into account when trying to merge bios and requests. For two ++bios/requests to be merged, they must have compatible encryption contexts: both ++unencrypted, or both encrypted with the same key and contiguous data unit ++numbers. Only the encryption context for the first bio in a request is ++retained, since the remaining bios have been verified to be merge-compatible ++with the first bio. ++ ++To make it possible for inline encryption to work with request_queue based ++layered devices, when a request is cloned, its encryption context is cloned as ++well. When the cloned request is submitted, it is then processed as usual; this ++includes getting a keyslot from the clone's target device if needed. ++ ++blk-crypto-fallback ++=================== ++ ++It is desirable for the inline encryption support of upper layers (e.g. ++filesystems) to be testable without real inline encryption hardware, and ++likewise for the block layer's keyslot management logic. It is also desirable ++to allow upper layers to just always use inline encryption rather than have to ++implement encryption in multiple ways. ++ ++Therefore, we also introduce *blk-crypto-fallback*, which is an implementation ++of inline encryption using the kernel crypto API. blk-crypto-fallback is built ++into the block layer, so it works on any block device without any special setup. ++Essentially, when a bio with an encryption context is submitted to a ++request_queue that doesn't support that encryption context, the block layer will ++handle en/decryption of the bio using blk-crypto-fallback. ++ ++For encryption, the data cannot be encrypted in-place, as callers usually rely ++on it being unmodified. Instead, blk-crypto-fallback allocates bounce pages, ++fills a new bio with those bounce pages, encrypts the data into those bounce ++pages, and submits that "bounce" bio. When the bounce bio completes, ++blk-crypto-fallback completes the original bio. If the original bio is too ++large, multiple bounce bios may be required; see the code for details. ++ ++For decryption, blk-crypto-fallback "wraps" the bio's completion callback ++(``bi_complete``) and private data (``bi_private``) with its own, unsets the ++bio's encryption context, then submits the bio. If the read completes ++successfully, blk-crypto-fallback restores the bio's original completion ++callback and private data, then decrypts the bio's data in-place using the ++kernel crypto API. Decryption happens from a workqueue, as it may sleep. ++Afterwards, blk-crypto-fallback completes the bio. ++ ++In both cases, the bios that blk-crypto-fallback submits no longer have an ++encryption context. Therefore, lower layers only see standard unencrypted I/O. ++ ++blk-crypto-fallback also defines its own blk_crypto_profile and has its own ++"keyslots"; its keyslots contain ``struct crypto_skcipher`` objects. The reason ++for this is twofold. First, it allows the keyslot management logic to be tested ++without actual inline encryption hardware. Second, similar to actual inline ++encryption hardware, the crypto API doesn't accept keys directly in requests but ++rather requires that keys be set ahead of time, and setting keys can be ++expensive; moreover, allocating a crypto_skcipher can't happen on the I/O path ++at all due to the locks it takes. Therefore, the concept of keyslots still ++makes sense for blk-crypto-fallback. ++ ++Note that regardless of whether real inline encryption hardware or + blk-crypto-fallback is used, the ciphertext written to disk (and hence the +-on-disk format of data) will be the same (assuming the hardware's implementation +-of the algorithm being used adheres to spec and functions correctly). +- +-If a ``request queue``'s inline encryption hardware claimed to support the +-encryption context specified with a bio, then it will not be handled by the +-``blk-crypto-fallback``. We will eventually reach a point in blk-mq when a +-struct request needs to be allocated for that bio. At that point, +-blk-mq tries to program the encryption context into the ``request_queue``'s +-keyslot_manager, and obtain a keyslot, which it stores in its newly added +-``keyslot`` field. This keyslot is released when the request is completed. +- +-When the first bio is added to a request, ``blk_crypto_rq_bio_prep`` is called, +-which sets the request's ``crypt_ctx`` to a copy of the bio's +-``bi_crypt_context``. bio_crypt_do_front_merge is called whenever a subsequent +-bio is merged to the front of the request, which updates the ``crypt_ctx`` of +-the request so that it matches the newly merged bio's ``bi_crypt_context``. In particular, the request keeps a copy of the ``bi_crypt_context`` of the first +-bio in its bio-list (blk-mq needs to be careful to maintain this invariant +-during bio and request merges). +- +-To make it possible for inline encryption to work with request queue based +-layered devices, when a request is cloned, its ``crypto fields`` are cloned as +-well. When the cloned request is submitted, blk-mq programs the +-``bi_crypt_context`` of the request into the clone's request_queue's keyslot +-manager, and stores the returned keyslot in the clone's ``keyslot``. ++on-disk format of data) will be the same (assuming that both the inline ++encryption hardware's implementation and the kernel crypto API's implementation ++of the algorithm being used adhere to spec and function correctly). + ++blk-crypto-fallback is optional and is controlled by the ++``CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK`` kernel configuration option. + + API presented to users of the block layer + ========================================= + +-``struct blk_crypto_key`` represents a crypto key (the raw key, size of the +-key, the crypto algorithm to use, the data unit size to use, and the number of +-bytes required to represent data unit numbers that will be specified with the +-``bi_crypt_context``). +- +-``blk_crypto_init_key`` allows upper layers to initialize such a +-``blk_crypto_key``. +- +-``bio_crypt_set_ctx`` should be called on any bio that a user of +-the block layer wants en/decrypted via inline encryption (or the +-blk-crypto-fallback, if hardware support isn't available for the desired +-crypto configuration). This function takes the ``blk_crypto_key`` and the +-data unit number (DUN) to use when en/decrypting the bio. +- +-``blk_crypto_config_supported`` allows upper layers to query whether or not the +-an encryption context passed to request queue can be handled by blk-crypto +-(either by real inline encryption hardware, or by the blk-crypto-fallback). +-This is useful e.g. when blk-crypto-fallback is disabled, and the upper layer +-wants to use an algorithm that may not supported by hardware - this function +-lets the upper layer know ahead of time that the algorithm isn't supported, +-and the upper layer can fallback to something else if appropriate. +- +-``blk_crypto_start_using_key`` - Upper layers must call this function on +-``blk_crypto_key`` and a ``request_queue`` before using the key with any bio +-headed for that ``request_queue``. This function ensures that either the +-hardware supports the key's crypto settings, or the crypto API fallback has +-transforms for the needed mode allocated and ready to go. Note that this +-function may allocate an ``skcipher``, and must not be called from the data +-path, since allocating ``skciphers`` from the data path can deadlock. +- +-``blk_crypto_evict_key`` *must* be called by upper layers before a +-``blk_crypto_key`` is freed. Further, it *must* only be called only once +-there are no more in-flight requests that use that ``blk_crypto_key``. +-``blk_crypto_evict_key`` will ensure that a key is removed from any keyslots in +-inline encryption hardware that the key might have been programmed into (or the blk-crypto-fallback). ++``blk_crypto_config_supported()`` allows users to check ahead of time whether ++inline encryption with particular crypto settings will work on a particular ++request_queue -- either via hardware or via blk-crypto-fallback. This function ++takes in a ``struct blk_crypto_config`` which is like blk_crypto_key, but omits ++the actual bytes of the key and instead just contains the algorithm, data unit ++size, etc. This function can be useful if blk-crypto-fallback is disabled. ++ ++``blk_crypto_init_key()`` allows users to initialize a blk_crypto_key. ++ ++Users must call ``blk_crypto_start_using_key()`` before actually starting to use ++a blk_crypto_key on a request_queue (even if ``blk_crypto_config_supported()`` ++was called earlier). This is needed to initialize blk-crypto-fallback if it ++will be needed. This must not be called from the data path, as this may have to ++allocate resources, which may deadlock in that case. ++ ++Next, to attach an encryption context to a bio, users should call ++``bio_crypt_set_ctx()``. This function allocates a bio_crypt_ctx and attaches ++it to a bio, given the blk_crypto_key and the data unit number that will be used ++for en/decryption. Users don't need to worry about freeing the bio_crypt_ctx ++later, as that happens automatically when the bio is freed or reset. ++ ++Finally, when done using inline encryption with a blk_crypto_key on a ++request_queue, users must call ``blk_crypto_evict_key()``. This ensures that ++the key is evicted from all keyslots it may be programmed into and unlinked from ++any kernel data structures it may be linked into. ++ ++In summary, for users of the block layer, the lifecycle of a blk_crypto_key is ++as follows: ++ ++1. ``blk_crypto_config_supported()`` (optional) ++2. ``blk_crypto_init_key()`` ++3. ``blk_crypto_start_using_key()`` ++4. ``bio_crypt_set_ctx()`` (potentially many times) ++5. ``blk_crypto_evict_key()`` (after all I/O has completed) ++6. Zeroize the blk_crypto_key (this has no dedicated function) ++ ++If a blk_crypto_key is being used on multiple request_queues, then ++``blk_crypto_config_supported()`` (if used), ``blk_crypto_start_using_key()``, ++and ``blk_crypto_evict_key()`` must be called on each request_queue. + + API presented to device drivers + =============================== + +-A :c:type:``struct blk_keyslot_manager`` should be set up by device drivers in +-the ``request_queue`` of the device. The device driver needs to call +-``blk_ksm_init`` (or its resource-managed variant ``devm_blk_ksm_init``) on the +-``blk_keyslot_manager``, while specifying the number of keyslots supported by +-the hardware. +- +-The device driver also needs to tell the KSM how to actually manipulate the +-IE hardware in the device to do things like programming the crypto key into +-the IE hardware into a particular keyslot. All this is achieved through the +-struct blk_ksm_ll_ops field in the KSM that the device driver +-must fill up after initing the ``blk_keyslot_manager``. +- +-The KSM also handles runtime power management for the device when applicable +-(e.g. when it wants to program a crypto key into the IE hardware, the device +-must be runtime powered on) - so the device driver must also set the ``dev`` +-field in the ksm to point to the `struct device` for the KSM to use for runtime +-power management. +- +-``blk_ksm_reprogram_all_keys`` can be called by device drivers if the device +-needs each and every of its keyslots to be reprogrammed with the key it +-"should have" at the point in time when the function is called. This is useful +-e.g. if a device loses all its keys on runtime power down/up. +- +-If the driver used ``blk_ksm_init`` instead of ``devm_blk_ksm_init``, then +-``blk_ksm_destroy`` should be called to free up all resources used by a +-``blk_keyslot_manager`` once it is no longer needed. ++A device driver that wants to support inline encryption must set up a ++blk_crypto_profile in the request_queue of its device. To do this, it first ++must call ``blk_crypto_profile_init()`` (or its resource-managed variant ++``devm_blk_crypto_profile_init()``), providing the number of keyslots. ++ ++Next, it must advertise its crypto capabilities by setting fields in the ++blk_crypto_profile, e.g. ``modes_supported`` and ``max_dun_bytes_supported``. ++ ++It then must set function pointers in the ``ll_ops`` field of the ++blk_crypto_profile to tell upper layers how to control the inline encryption ++hardware, e.g. how to program and evict keyslots. Most drivers will need to ++implement ``keyslot_program`` and ``keyslot_evict``. For details, see the ++comments for ``struct blk_crypto_ll_ops``. ++ ++Once the driver registers a blk_crypto_profile with a request_queue, I/O ++requests the driver receives via that queue may have an encryption context. All ++encryption contexts will be compatible with the crypto capabilities declared in ++the blk_crypto_profile, so drivers don't need to worry about handling ++unsupported requests. Also, if a nonzero number of keyslots was declared in the ++blk_crypto_profile, then all I/O requests that have an encryption context will ++also have a keyslot which was already programmed with the appropriate key. ++ ++If the driver implements runtime suspend and its blk_crypto_ll_ops don't work ++while the device is runtime-suspended, then the driver must also set the ``dev`` ++field of the blk_crypto_profile to point to the ``struct device`` that will be ++resumed before any of the low-level operations are called. ++ ++If there are situations where the inline encryption hardware loses the contents ++of its keyslots, e.g. device resets, the driver must handle reprogramming the ++keyslots. To do this, the driver may call ``blk_crypto_reprogram_all_keys()``. ++ ++Finally, if the driver used ``blk_crypto_profile_init()`` instead of ++``devm_blk_crypto_profile_init()``, then it is responsible for calling ++``blk_crypto_profile_destroy()`` when the crypto profile is no longer needed. + + Layered Devices + =============== + +-Request queue based layered devices like dm-rq that wish to support IE need to +-create their own keyslot manager for their request queue, and expose whatever +-functionality they choose. When a layered device wants to pass a clone of that +-request to another ``request_queue``, blk-crypto will initialize and prepare the +-clone as necessary - see ``blk_crypto_insert_cloned_request`` in +-``blk-crypto.c``. +- +- +-Future Optimizations for layered devices +-======================================== +- +-Creating a keyslot manager for a layered device uses up memory for each +-keyslot, and in general, a layered device merely passes the request on to a +-"child" device, so the keyslots in the layered device itself are completely +-unused, and don't need any refcounting or keyslot programming. We can instead +-define a new type of KSM; the "passthrough KSM", that layered devices can use +-to advertise an unlimited number of keyslots, and support for any encryption +-algorithms they choose, while not actually using any memory for each keyslot. +-Another use case for the "passthrough KSM" is for IE devices that do not have a +-limited number of keyslots. +- ++Request queue based layered devices like dm-rq that wish to support inline ++encryption need to create their own blk_crypto_profile for their request_queue, ++and expose whatever functionality they choose. When a layered device wants to ++pass a clone of that request to another request_queue, blk-crypto will ++initialize and prepare the clone as necessary; see ++``blk_crypto_insert_cloned_request()``. + + Interaction between inline encryption and blk integrity + ======================================================= +@@ -257,7 +296,7 @@ Because there isn't any real hardware yet, it seems prudent to assume that + hardware implementations might not implement both features together correctly, + and disallow the combination for now. Whenever a device supports integrity, the + kernel will pretend that the device does not support hardware inline encryption +-(by essentially setting the keyslot manager in the request_queue of the device +-to NULL). When the crypto API fallback is enabled, this means that all bios with +-and encryption context will use the fallback, and IO will complete as usual. +-When the fallback is disabled, a bio with an encryption context will be failed. ++(by setting the blk_crypto_profile in the request_queue of the device to NULL). ++When the crypto API fallback is enabled, this means that all bios with and ++encryption context will use the fallback, and IO will complete as usual. When ++the fallback is disabled, a bio with an encryption context will be failed. +-- +2.35.3 + diff --git a/patches.suse/blk-mq-Fix-blk_mq_tagset_busy_iter-for-shared-tags.patch b/patches.suse/blk-mq-Fix-blk_mq_tagset_busy_iter-for-shared-tags.patch new file mode 100644 index 0000000..2f31453 --- /dev/null +++ b/patches.suse/blk-mq-Fix-blk_mq_tagset_busy_iter-for-shared-tags.patch @@ -0,0 +1,45 @@ +From: John Garry +Date: Mon, 18 Oct 2021 17:41:23 +0800 +Subject: [PATCH] blk-mq: Fix blk_mq_tagset_busy_iter() for shared tags +Git-commit: 0994c64eb4159ba019e7fedc7ba0dd6a69235b40 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Since it is now possible for a tagset to share a single set of tags, the +iter function should not re-iter the tags for the count of #hw queues in +that case. Rather it should just iter once. + +Fixes: e155b0c238b2 ("blk-mq: Use shared tags for shared sbitmap support") +Reported-by: Kashyap Desai +Signed-off-by: John Garry +Reviewed-by: Ming Lei +Tested-by: Kashyap Desai +Link: https://lore.kernel.org/r/1634550083-202815-1-git-send-email-john.garry@huawei.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq-tag.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c +index b94c3e8ef392..995336abee33 100644 +--- a/block/blk-mq-tag.c ++++ b/block/blk-mq-tag.c +@@ -399,9 +399,12 @@ void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, + void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, + busy_tag_iter_fn *fn, void *priv) + { +- int i; ++ unsigned int flags = tagset->flags; ++ int i, nr_tags; ++ ++ nr_tags = blk_mq_is_shared_tags(flags) ? 1 : tagset->nr_hw_queues; + +- for (i = 0; i < tagset->nr_hw_queues; i++) { ++ for (i = 0; i < nr_tags; i++) { + if (tagset->tags && tagset->tags[i]) + __blk_mq_all_tag_iter(tagset->tags[i], fn, priv, + BT_TAG_ITER_STARTED); +-- +2.35.3 + diff --git a/patches.suse/blk-mq-debugfs-Show-active-requests-per-queue-for-sh.patch b/patches.suse/blk-mq-debugfs-Show-active-requests-per-queue-for-sh.patch new file mode 100644 index 0000000..43af36b --- /dev/null +++ b/patches.suse/blk-mq-debugfs-Show-active-requests-per-queue-for-sh.patch @@ -0,0 +1,42 @@ +From: John Garry +Date: Fri, 29 Oct 2021 16:40:23 +0800 +Subject: [PATCH] blk-mq-debugfs: Show active requests per queue for shared + tags +Git-commit: 9b84c629c90374498ab5825dede74a06ea1c775b +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Currently we show the hctx.active value for the per-hctx "active" file. + +However this is not maintained for shared tags, and we instead keep a +record of the number active requests per request queue - see commit +f1b49fdc1c64 ("blk-mq: Record active_queues_shared_sbitmap per tag_set for +when using shared sbitmap). + +Change for the case of shared tags to show the active requests per request +queue by using __blk_mq_active_requests() helper. + +Signed-off-by: John Garry +Link: https://lore.kernel.org/r/1635496823-33515-1-git-send-email-john.garry@huawei.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq-debugfs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c +index 68ca5d21cda7..0f8c60e9c719 100644 +--- a/block/blk-mq-debugfs.c ++++ b/block/blk-mq-debugfs.c +@@ -550,7 +550,7 @@ static int hctx_active_show(void *data, struct seq_file *m) + { + struct blk_mq_hw_ctx *hctx = data; + +- seq_printf(m, "%d\n", atomic_read(&hctx->nr_active)); ++ seq_printf(m, "%d\n", __blk_mq_active_requests(hctx)); + return 0; + } + +-- +2.35.3 + diff --git a/patches.suse/blk-mq-don-t-handle-non-flush-requests-in-blk_insert.patch b/patches.suse/blk-mq-don-t-handle-non-flush-requests-in-blk_insert.patch new file mode 100644 index 0000000..a5ba507 --- /dev/null +++ b/patches.suse/blk-mq-don-t-handle-non-flush-requests-in-blk_insert.patch @@ -0,0 +1,112 @@ +From: Christoph Hellwig +Date: Tue, 19 Oct 2021 14:25:53 +0200 +Subject: [PATCH] blk-mq: don't handle non-flush requests in blk_insert_flush +Git-commit: d92ca9d8348fb12c89eac5928bd651c3a485d7b9 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Return to the normal blk_mq_submit_bio flow if the bio did not end up +actually being a flush because the device didn't support it. Note that +this is basically impossible to hit without special instrumentation given +that submit_bio_checks already clears these flags usually, so we'd need a +tight race to actually hit this code path. + +With this the call to blk_mq_run_hw_queue for the flush requests can be +removed given that the actual flush requests are always issued via the +requeue workqueue which runs the queue unconditionally. + +Signed-off-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20211019122553.2467817-1-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-flush.c | 12 ++++++------ + block/blk-mq.c | 14 ++++++-------- + block/blk.h | 2 +- + 3 files changed, 13 insertions(+), 15 deletions(-) + +diff --git a/block/blk-flush.c b/block/blk-flush.c +index 4201728bf3a5..8e364bda5166 100644 +--- a/block/blk-flush.c ++++ b/block/blk-flush.c +@@ -379,7 +379,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error) + * @rq is being submitted. Analyze what needs to be done and put it on the + * right queue. + */ +-void blk_insert_flush(struct request *rq) ++bool blk_insert_flush(struct request *rq) + { + struct request_queue *q = rq->q; + unsigned long fflags = q->queue_flags; /* may change, cache */ +@@ -409,7 +409,7 @@ void blk_insert_flush(struct request *rq) + */ + if (!policy) { + blk_mq_end_request(rq, 0); +- return; ++ return true; + } + + BUG_ON(rq->bio != rq->biotail); /*assumes zero or single bio rq */ +@@ -420,10 +420,8 @@ void blk_insert_flush(struct request *rq) + * for normal execution. + */ + if ((policy & REQ_FSEQ_DATA) && +- !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { +- blk_mq_request_bypass_insert(rq, false, false); +- return; +- } ++ !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) ++ return false; + + /* + * @rq should go through flush machinery. Mark it part of flush +@@ -439,6 +437,8 @@ void blk_insert_flush(struct request *rq) + spin_lock_irq(&fq->mq_flush_lock); + blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0); + spin_unlock_irq(&fq->mq_flush_lock); ++ ++ return true; + } + + /** +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 71ab7521dd3d..3481a8712234 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -2532,14 +2532,12 @@ void blk_mq_submit_bio(struct bio *bio) + return; + } + +- if (unlikely(is_flush_fua)) { +- struct blk_mq_hw_ctx *hctx = rq->mq_hctx; +- /* Bypass scheduler for flush requests */ +- blk_insert_flush(rq); +- blk_mq_run_hw_queue(hctx, true); +- } else if (plug && (q->nr_hw_queues == 1 || +- blk_mq_is_shared_tags(rq->mq_hctx->flags) || +- q->mq_ops->commit_rqs || !blk_queue_nonrot(q))) { ++ if (is_flush_fua && blk_insert_flush(rq)) ++ return; ++ ++ if (plug && (q->nr_hw_queues == 1 || ++ blk_mq_is_shared_tags(rq->mq_hctx->flags) || ++ q->mq_ops->commit_rqs || !blk_queue_nonrot(q))) { + /* + * Use plugging if we have a ->commit_rqs() hook as well, as + * we know the driver uses bd->last in a smart fashion. +diff --git a/block/blk.h b/block/blk.h +index b9729c12fd62..6a039e6c7d07 100644 +--- a/block/blk.h ++++ b/block/blk.h +@@ -236,7 +236,7 @@ void __blk_account_io_done(struct request *req, u64 now); + */ + #define ELV_ON_HASH(rq) ((rq)->rq_flags & RQF_HASHED) + +-void blk_insert_flush(struct request *rq); ++bool blk_insert_flush(struct request *rq); + + int elevator_switch_mq(struct request_queue *q, + struct elevator_type *new_e); +-- +2.35.3 + diff --git a/patches.suse/blk-mq-don-t-issue-request-directly-in-case-that-cur.patch b/patches.suse/blk-mq-don-t-issue-request-directly-in-case-that-cur.patch new file mode 100644 index 0000000..755318d --- /dev/null +++ b/patches.suse/blk-mq-don-t-issue-request-directly-in-case-that-cur.patch @@ -0,0 +1,37 @@ +From: Ming Lei +Date: Tue, 26 Oct 2021 16:22:57 +0800 +Subject: [PATCH] blk-mq: don't issue request directly in case that current is + to be blocked +Git-commit: ff1552232b3612edff43a95746a4e78e231ef3d4 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +When flushing plug list in case that current will be blocked, we can't +issue request directly because ->queue_rq() may sleep, otherwise scheduler +may complain. + +Fixes: dc5fc361d891 ("block: attempt direct issue of plug list") +Signed-off-by: Ming Lei +Link: https://lore.kernel.org/r/20211026082257.2889890-1-ming.lei@redhat.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index c19dfa8ea65e..9840b15f505b 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -2223,7 +2223,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) + return; + plug->rq_count = 0; + +- if (!plug->multiple_queues && !plug->has_elevator) { ++ if (!plug->multiple_queues && !plug->has_elevator && !from_schedule) { + blk_mq_plug_issue_direct(plug, from_schedule); + if (rq_list_empty(plug->mq_list)) + return; +-- +2.35.3 + diff --git a/patches.suse/blk-mq-factor-out-a-blk_qc_to_hctx-helper.patch b/patches.suse/blk-mq-factor-out-a-blk_qc_to_hctx-helper.patch new file mode 100644 index 0000000..f556280 --- /dev/null +++ b/patches.suse/blk-mq-factor-out-a-blk_qc_to_hctx-helper.patch @@ -0,0 +1,66 @@ +From: Christoph Hellwig +Date: Tue, 12 Oct 2021 13:12:15 +0200 +Subject: [PATCH] blk-mq: factor out a blk_qc_to_hctx helper +Git-commit: f70299f0d58e0e21f7f5f5ab27e601e8d3f0373e +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Add a helper to get the hctx from a request_queue and cookie, and fold +the blk_qc_t_to_queue_num helper into it as no other callers are left. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Sagi Grimberg +Tested-by: Mark Wunderlich +Link: https://lore.kernel.org/r/20211012111226.760968-6-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 8 +++++++- + include/linux/blk_types.h | 5 ----- + 2 files changed, 7 insertions(+), 6 deletions(-) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 97b911866de1..35b2ab0a373a 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -65,6 +65,12 @@ static int blk_mq_poll_stats_bkt(const struct request *rq) + return bucket; + } + ++static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q, ++ blk_qc_t qc) ++{ ++ return q->queue_hw_ctx[(qc & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT]; ++} ++ + /* + * Check if any of the ctx, dispatch list or elevator + * have pending work in this hardware queue. +@@ -4071,7 +4077,7 @@ int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin) + if (current->plug) + blk_flush_plug_list(current->plug, false); + +- hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)]; ++ hctx = blk_qc_to_hctx(q, cookie); + + /* + * If we sleep, have the caller restart the poll loop to reset +diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h +index 3b967053e9f5..000351c5312a 100644 +--- a/include/linux/blk_types.h ++++ b/include/linux/blk_types.h +@@ -505,11 +505,6 @@ static inline bool blk_qc_t_valid(blk_qc_t cookie) + return cookie != BLK_QC_T_NONE; + } + +-static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) +-{ +- return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT; +-} +- + static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) + { + return cookie & ((1u << BLK_QC_T_SHIFT) - 1); +-- +2.35.3 + diff --git a/patches.suse/blk-mq-factor-out-a-classic-poll-helper.patch b/patches.suse/blk-mq-factor-out-a-classic-poll-helper.patch new file mode 100644 index 0000000..367750d --- /dev/null +++ b/patches.suse/blk-mq-factor-out-a-classic-poll-helper.patch @@ -0,0 +1,196 @@ +From: Christoph Hellwig +Date: Tue, 12 Oct 2021 13:12:16 +0200 +Subject: [PATCH] blk-mq: factor out a "classic" poll helper +Git-commit: c6699d6fe0ffe4d9fdc652d1acf5a94b4f9627ba +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Factor the code to do the classic full metal polling out of blk_poll into +a separate blk_mq_poll_classic helper. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Sagi Grimberg +Tested-by: Mark Wunderlich +Link: https://lore.kernel.org/r/20211012111226.760968-7-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 120 +++++++++++++++++++++++-------------------------- + 1 file changed, 56 insertions(+), 64 deletions(-) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 35b2ab0a373a..636773056874 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -71,6 +71,14 @@ static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q, + return q->queue_hw_ctx[(qc & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT]; + } + ++static inline struct request *blk_qc_to_rq(struct blk_mq_hw_ctx *hctx, ++ blk_qc_t qc) ++{ ++ if (blk_qc_t_is_internal(qc)) ++ return blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(qc)); ++ return blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(qc)); ++} ++ + /* + * Check if any of the ctx, dispatch list or elevator + * have pending work in this hardware queue. +@@ -3975,15 +3983,20 @@ static unsigned long blk_mq_poll_nsecs(struct request_queue *q, + return ret; + } + +-static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, +- struct request *rq) ++static bool blk_mq_poll_hybrid(struct request_queue *q, blk_qc_t qc) + { ++ struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, qc); ++ struct request *rq = blk_qc_to_rq(hctx, qc); + struct hrtimer_sleeper hs; + enum hrtimer_mode mode; + unsigned int nsecs; + ktime_t kt; + +- if (rq->rq_flags & RQF_MQ_POLL_SLEPT) ++ /* ++ * If a request has completed on queue that uses an I/O scheduler, we ++ * won't get back a request from blk_qc_to_rq. ++ */ ++ if (!rq || (rq->rq_flags & RQF_MQ_POLL_SLEPT)) + return false; + + /* +@@ -4025,32 +4038,48 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, + + __set_current_state(TASK_RUNNING); + destroy_hrtimer_on_stack(&hs.timer); ++ ++ /* ++ * If we sleep, have the caller restart the poll loop to reset the ++ * state. Like for the other success return cases, the caller is ++ * responsible for checking if the IO completed. If the IO isn't ++ * complete, we'll get called again and will go straight to the busy ++ * poll loop. ++ */ + return true; + } + +-static bool blk_mq_poll_hybrid(struct request_queue *q, +- struct blk_mq_hw_ctx *hctx, blk_qc_t cookie) ++static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie, ++ bool spin) + { +- struct request *rq; ++ struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie); ++ long state = get_current_state(); ++ int ret; + +- if (q->poll_nsec == BLK_MQ_POLL_CLASSIC) +- return false; ++ hctx->poll_considered++; + +- if (!blk_qc_t_is_internal(cookie)) +- rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie)); +- else { +- rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie)); +- /* +- * With scheduling, if the request has completed, we'll +- * get a NULL return here, as we clear the sched tag when +- * that happens. The request still remains valid, like always, +- * so we should be safe with just the NULL check. +- */ +- if (!rq) +- return false; +- } ++ do { ++ hctx->poll_invoked++; + +- return blk_mq_poll_hybrid_sleep(q, rq); ++ ret = q->mq_ops->poll(hctx); ++ if (ret > 0) { ++ hctx->poll_success++; ++ __set_current_state(TASK_RUNNING); ++ return ret; ++ } ++ ++ if (signal_pending_state(state, current)) ++ __set_current_state(TASK_RUNNING); ++ if (task_is_running(current)) ++ return 1; ++ ++ if (ret < 0 || !spin) ++ break; ++ cpu_relax(); ++ } while (!need_resched()); ++ ++ __set_current_state(TASK_RUNNING); ++ return 0; + } + + /** +@@ -4067,9 +4096,6 @@ static bool blk_mq_poll_hybrid(struct request_queue *q, + */ + int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin) + { +- struct blk_mq_hw_ctx *hctx; +- unsigned int state; +- + if (!blk_qc_t_valid(cookie) || + !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + return 0; +@@ -4077,46 +4103,12 @@ int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin) + if (current->plug) + blk_flush_plug_list(current->plug, false); + +- hctx = blk_qc_to_hctx(q, cookie); +- +- /* +- * If we sleep, have the caller restart the poll loop to reset +- * the state. Like for the other success return cases, the +- * caller is responsible for checking if the IO completed. If +- * the IO isn't complete, we'll get called again and will go +- * straight to the busy poll loop. If specified not to spin, +- * we also should not sleep. +- */ +- if (spin && blk_mq_poll_hybrid(q, hctx, cookie)) +- return 1; +- +- hctx->poll_considered++; +- +- state = get_current_state(); +- do { +- int ret; +- +- hctx->poll_invoked++; +- +- ret = q->mq_ops->poll(hctx); +- if (ret > 0) { +- hctx->poll_success++; +- __set_current_state(TASK_RUNNING); +- return ret; +- } +- +- if (signal_pending_state(state, current)) +- __set_current_state(TASK_RUNNING); +- +- if (task_is_running(current)) ++ /* If specified not to spin, we also should not sleep. */ ++ if (spin && q->poll_nsec != BLK_MQ_POLL_CLASSIC) { ++ if (blk_mq_poll_hybrid(q, cookie)) + return 1; +- if (ret < 0 || !spin) +- break; +- cpu_relax(); +- } while (!need_resched()); +- +- __set_current_state(TASK_RUNNING); +- return 0; ++ } ++ return blk_mq_poll_classic(q, cookie, spin); + } + EXPORT_SYMBOL_GPL(blk_poll); + +-- +2.35.3 + diff --git a/patches.suse/blk-mq-move-blk_mq_flush_plug_list-to-block-blk-mq.h.patch b/patches.suse/blk-mq-move-blk_mq_flush_plug_list-to-block-blk-mq.h.patch new file mode 100644 index 0000000..fd76402 --- /dev/null +++ b/patches.suse/blk-mq-move-blk_mq_flush_plug_list-to-block-blk-mq.h.patch @@ -0,0 +1,46 @@ +From: Christoph Hellwig +Date: Wed, 20 Oct 2021 16:41:17 +0200 +Subject: [PATCH] blk-mq: move blk_mq_flush_plug_list to block/blk-mq.h +Git-commit: dbb6f764a079d1dea883c6f2439d91db4f0fb2f2 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +This helper is internal to the block layer. + +Signed-off-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20211020144119.142582-3-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.h | 1 + + include/linux/blk-mq.h | 2 -- + 2 files changed, 1 insertion(+), 2 deletions(-) + +diff --git a/block/blk-mq.h b/block/blk-mq.h +index d8ccb341e82e..08fb5922e611 100644 +--- a/block/blk-mq.h ++++ b/block/blk-mq.h +@@ -122,6 +122,7 @@ extern int blk_mq_sysfs_register(struct request_queue *q); + extern void blk_mq_sysfs_unregister(struct request_queue *q); + extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); + void blk_mq_free_plug_rqs(struct blk_plug *plug); ++void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); + + void blk_mq_release(struct request_queue *q); + +diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h +index 6cf35de151a9..e13780236550 100644 +--- a/include/linux/blk-mq.h ++++ b/include/linux/blk-mq.h +@@ -656,8 +656,6 @@ int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set, + unsigned int set_flags); + void blk_mq_free_tag_set(struct blk_mq_tag_set *set); + +-void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); +- + void blk_mq_free_request(struct request *rq); + + bool blk_mq_queue_inflight(struct request_queue *q); +-- +2.35.3 + diff --git a/patches.suse/blk-mq-only-flush-requests-from-the-plug-in-blk_mq_s.patch b/patches.suse/blk-mq-only-flush-requests-from-the-plug-in-blk_mq_s.patch new file mode 100644 index 0000000..c986060 --- /dev/null +++ b/patches.suse/blk-mq-only-flush-requests-from-the-plug-in-blk_mq_s.patch @@ -0,0 +1,38 @@ +From: Christoph Hellwig +Date: Wed, 20 Oct 2021 16:41:16 +0200 +Subject: [PATCH] blk-mq: only flush requests from the plug in + blk_mq_submit_bio +Git-commit: a214b949d8e365583dd67441f6f608f0b20f7f52 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Replace the call to blk_flush_plug_list in blk_mq_submit_bio with a +direct call to blk_mq_flush_plug_list. This means we do not flush +plug callback from stackable devices, which doesn't really help with +the accumulated requests anyway, and it also means the cached requests +aren't freed here as they can still be used later on. + +Signed-off-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20211020144119.142582-2-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index a71aeed7b987..101466ece4c4 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -2568,7 +2568,7 @@ void blk_mq_submit_bio(struct bio *bio) + } + + if (request_count >= blk_plug_max_rq_count(plug) || last) { +- blk_flush_plug_list(plug, false); ++ blk_mq_flush_plug_list(plug, false); + trace_block_plug(q); + } + +-- +2.35.3 + diff --git a/patches.suse/blk-mq-remove-blk_qc_t_to_tag-and-blk_qc_t_is_intern.patch b/patches.suse/blk-mq-remove-blk_qc_t_to_tag-and-blk_qc_t_is_intern.patch new file mode 100644 index 0000000..ad1f751 --- /dev/null +++ b/patches.suse/blk-mq-remove-blk_qc_t_to_tag-and-blk_qc_t_is_intern.patch @@ -0,0 +1,64 @@ +From: Christoph Hellwig +Date: Tue, 12 Oct 2021 13:12:17 +0200 +Subject: [PATCH] blk-mq: remove blk_qc_t_to_tag and blk_qc_t_is_internal +Git-commit: efbabbe121f96d4b1a98a2c2ef5d2e8f7fb41c87 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Merge both functions into their only caller to keep the blk-mq tag to +blk_qc_t mapping as private as possible in blk-mq.c. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Sagi Grimberg +Tested-by: Mark Wunderlich +Link: https://lore.kernel.org/r/20211012111226.760968-8-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 8 +++++--- + include/linux/blk_types.h | 10 ---------- + 2 files changed, 5 insertions(+), 13 deletions(-) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 636773056874..24a10c973f79 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -74,9 +74,11 @@ static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q, + static inline struct request *blk_qc_to_rq(struct blk_mq_hw_ctx *hctx, + blk_qc_t qc) + { +- if (blk_qc_t_is_internal(qc)) +- return blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(qc)); +- return blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(qc)); ++ unsigned int tag = qc & ((1U << BLK_QC_T_SHIFT) - 1); ++ ++ if (qc & BLK_QC_T_INTERNAL) ++ return blk_mq_tag_to_rq(hctx->sched_tags, tag); ++ return blk_mq_tag_to_rq(hctx->tags, tag); + } + + /* +diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h +index 000351c5312a..fb7c1477617b 100644 +--- a/include/linux/blk_types.h ++++ b/include/linux/blk_types.h +@@ -505,16 +505,6 @@ static inline bool blk_qc_t_valid(blk_qc_t cookie) + return cookie != BLK_QC_T_NONE; + } + +-static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) +-{ +- return cookie & ((1u << BLK_QC_T_SHIFT) - 1); +-} +- +-static inline bool blk_qc_t_is_internal(blk_qc_t cookie) +-{ +- return (cookie & BLK_QC_T_INTERNAL) != 0; +-} +- + struct blk_rq_stat { + u64 mean; + u64 min; +-- +2.35.3 + diff --git a/patches.suse/blk-mq-remove-blk_qc_t_valid.patch b/patches.suse/blk-mq-remove-blk_qc_t_valid.patch new file mode 100644 index 0000000..6143b98 --- /dev/null +++ b/patches.suse/blk-mq-remove-blk_qc_t_valid.patch @@ -0,0 +1,53 @@ +From: Christoph Hellwig +Date: Tue, 12 Oct 2021 13:12:18 +0200 +Subject: [PATCH] blk-mq: remove blk_qc_t_valid +Git-commit: 28a1ae6b9daba6ac65700eeb38479bd6fadec089 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Move the trivial check into the only caller. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Sagi Grimberg +Reviewed-by: Chaitanya Kulkarni +Tested-by: Mark Wunderlich +Link: https://lore.kernel.org/r/20211012111226.760968-9-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 2 +- + include/linux/blk_types.h | 5 ----- + 2 files changed, 1 insertion(+), 6 deletions(-) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 24a10c973f79..7d0d947921a6 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -4098,7 +4098,7 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie, + */ + int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin) + { +- if (!blk_qc_t_valid(cookie) || ++ if (cookie == BLK_QC_T_NONE || + !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + return 0; + +diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h +index fb7c1477617b..5017ba8fc539 100644 +--- a/include/linux/blk_types.h ++++ b/include/linux/blk_types.h +@@ -500,11 +500,6 @@ typedef unsigned int blk_qc_t; + #define BLK_QC_T_SHIFT 16 + #define BLK_QC_T_INTERNAL (1U << 31) + +-static inline bool blk_qc_t_valid(blk_qc_t cookie) +-{ +- return cookie != BLK_QC_T_NONE; +-} +- + struct blk_rq_stat { + u64 mean; + u64 min; +-- +2.35.3 + diff --git a/patches.suse/blk-mq-sched-Don-t-reference-queue-tagset-in-blk_mq_.patch b/patches.suse/blk-mq-sched-Don-t-reference-queue-tagset-in-blk_mq_.patch new file mode 100644 index 0000000..5525a11 --- /dev/null +++ b/patches.suse/blk-mq-sched-Don-t-reference-queue-tagset-in-blk_mq_.patch @@ -0,0 +1,44 @@ +From: John Garry +Date: Fri, 22 Oct 2021 16:12:20 +0800 +Subject: [PATCH] blk-mq-sched: Don't reference queue tagset in + blk_mq_sched_tags_teardown() +Git-commit: 8bdf7b3fe1f48a2c1c212d4685903bba01409c0e +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We should not reference the queue tagset in blk_mq_sched_tags_teardown() +(see function comment) for the blk-mq flags, so use the passed flags +instead. + +This solves a use-after-free, similarly fixed earlier (and since broken +again) in commit f0c1c4d2864e ("blk-mq: fix use-after-free in +blk_mq_exit_sched"). + +Reported-by: Linux Kernel Functional Testing +Reported-by: Naresh Kamboju +Tested-by: Anders Roxell +Fixes: e155b0c238b2 ("blk-mq: Use shared tags for shared sbitmap support") +Signed-off-by: John Garry +Link: https://lore.kernel.org/r/1634890340-15432-1-git-send-email-john.garry@huawei.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq-sched.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c +index 5b259fdea794..c62b966dfaba 100644 +--- a/block/blk-mq-sched.c ++++ b/block/blk-mq-sched.c +@@ -541,7 +541,7 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int fla + + queue_for_each_hw_ctx(q, hctx, i) { + if (hctx->sched_tags) { +- if (!blk_mq_is_shared_tags(q->tag_set->flags)) ++ if (!blk_mq_is_shared_tags(flags)) + blk_mq_free_rq_map(hctx->sched_tags); + hctx->sched_tags = NULL; + } +-- +2.35.3 + diff --git a/patches.suse/blk-wbt-prevent-NULL-pointer-dereference-in-wb_timer.patch b/patches.suse/blk-wbt-prevent-NULL-pointer-dereference-in-wb_timer.patch new file mode 100644 index 0000000..dcfdccd --- /dev/null +++ b/patches.suse/blk-wbt-prevent-NULL-pointer-dereference-in-wb_timer.patch @@ -0,0 +1,77 @@ +From: Andrea Righi +Date: Tue, 19 Oct 2021 11:20:26 +0200 +Subject: [PATCH] blk-wbt: prevent NULL pointer dereference in wb_timer_fn +Git-commit: 480d42dc001bbfe953825a92073012fcd5a99161 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +The timer callback used to evaluate if the latency is exceeded can be +executed after the corresponding disk has been released, causing the +following NULL pointer dereference: + +[ 119.987108] BUG: kernel NULL pointer dereference, address: 0000000000000098 +[ 119.987617] #PF: supervisor read access in kernel mode +[ 119.987971] #PF: error_code(0x0000) - not-present page +[ 119.988325] PGD 7c4a4067 P4D 7c4a4067 PUD 7bf63067 PMD 0 +[ 119.988697] Oops: 0000 [#1] SMP NOPTI +[ 119.988959] CPU: 1 PID: 9353 Comm: cloud-init Not tainted 5.15-rc5+arighi #rc5+arighi +[ 119.989520] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-2 04/01/2014 +[ 119.990055] RIP: 0010:wb_timer_fn+0x44/0x3c0 +[ 119.990376] Code: 41 8b 9c 24 98 00 00 00 41 8b 94 24 b8 00 00 00 41 8b 84 24 d8 00 00 00 4d 8b 74 24 28 01 d3 01 c3 49 8b 44 24 60 48 8b 40 78 <4c> 8b b8 98 00 00 00 4d 85 f6 0f 84 c4 00 00 00 49 83 7c 24 30 00 +[ 119.991578] RSP: 0000:ffffb5f580957da8 EFLAGS: 00010246 +[ 119.991937] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000004 +[ 119.992412] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88f476d7f780 +[ 119.992895] RBP: ffffb5f580957dd0 R08: 0000000000000000 R09: 0000000000000000 +[ 119.993371] R10: 0000000000000004 R11: 0000000000000002 R12: ffff88f476c84500 +[ 119.993847] R13: ffff88f4434390c0 R14: 0000000000000000 R15: ffff88f4bdc98c00 +[ 119.994323] FS: 00007fb90bcd9c00(0000) GS:ffff88f4bdc80000(0000) knlGS:0000000000000000 +[ 119.994952] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 119.995380] CR2: 0000000000000098 CR3: 000000007c0d6000 CR4: 00000000000006e0 +[ 119.995906] Call Trace: +[ 119.996130] ? blk_stat_free_callback_rcu+0x30/0x30 +[ 119.996505] blk_stat_timer_fn+0x138/0x140 +[ 119.996830] call_timer_fn+0x2b/0x100 +[ 119.997136] __run_timers.part.0+0x1d1/0x240 +[ 119.997470] ? kvm_clock_get_cycles+0x11/0x20 +[ 119.997826] ? ktime_get+0x3e/0xa0 +[ 119.998110] ? native_apic_msr_write+0x2c/0x30 +[ 119.998456] ? lapic_next_event+0x20/0x30 +[ 119.998779] ? clockevents_program_event+0x94/0xf0 +[ 119.999150] run_timer_softirq+0x2a/0x50 +[ 119.999465] __do_softirq+0xcb/0x26f +[ 119.999764] irq_exit_rcu+0x8c/0xb0 +[ 120.000057] sysvec_apic_timer_interrupt+0x43/0x90 +[ 120.000429] ? asm_sysvec_apic_timer_interrupt+0xa/0x20 +[ 120.000836] asm_sysvec_apic_timer_interrupt+0x12/0x20 + +In this case simply return from the timer callback (no action +required) to prevent the NULL pointer dereference. + +BugLink: https://bugs.launchpad.net/bugs/1947557 +Link: https://lore.kernel.org/linux-mm/YWRNVTk9N8K0RMst@arighi-desktop/ +Fixes: 34dbad5d26e2 ("blk-stat: convert to callback-based statistics reporting") +Signed-off-by: Andrea Righi +Link: https://lore.kernel.org/r/YW6N2qXpBU3oc50q@arighi-desktop +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-wbt.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/block/blk-wbt.c b/block/blk-wbt.c +index 874c1c37bf0c..0c119be0e813 100644 +--- a/block/blk-wbt.c ++++ b/block/blk-wbt.c +@@ -357,6 +357,9 @@ static void wb_timer_fn(struct blk_stat_callback *cb) + unsigned int inflight = wbt_inflight(rwb); + int status; + ++ if (!rwb->rqos.q->disk) ++ return; ++ + status = latency_exceeded(rwb, cb->stat); + + trace_wbt_timer(rwb->rqos.q->disk->bdi, status, rqd->scale_step, +-- +2.35.3 + diff --git a/patches.suse/block-Add-independent-access-ranges-support.patch b/patches.suse/block-Add-independent-access-ranges-support.patch new file mode 100644 index 0000000..8550e9c --- /dev/null +++ b/patches.suse/block-Add-independent-access-ranges-support.patch @@ -0,0 +1,588 @@ +From: Damien Le Moal +Date: Wed, 27 Oct 2021 11:22:19 +0900 +Subject: [PATCH] block: Add independent access ranges support +Git-commit: a2247f19ee1c5ad75ef095cdfb909a3244b88aa8 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +The Concurrent Positioning Ranges VPD page (for SCSI) and data log page +(for ATA) contain parameters describing the set of contiguous LBAs that +can be served independently by a single LUN multi-actuator hard-disk. +Similarly, a logically defined block device composed of multiple disks +can in some cases execute requests directed at different sector ranges +in parallel. A dm-linear device aggregating 2 block devices together is +an example. + +This patch implements support for exposing a block device independent +access ranges to the user through sysfs to allow optimizing device +accesses to increase performance. + +To describe the set of independent sector ranges of a device (actuators +of a multi-actuator HDDs or table entries of a dm-linear device), +The type struct blk_independent_access_ranges is introduced. This +structure describes the sector ranges using an array of +struct blk_independent_access_range structures. This range structure +defines the start sector and number of sectors of the access range. +The ranges in the array cannot overlap and must contain all sectors +within the device capacity. + +The function disk_set_independent_access_ranges() allows a device +driver to signal to the block layer that a device has multiple +independent access ranges. In this case, a struct +blk_independent_access_ranges is attached to the device request queue +by the function disk_set_independent_access_ranges(). The function +disk_alloc_independent_access_ranges() is provided for drivers to +allocate this structure. + +struct blk_independent_access_ranges contains kobjects (struct kobject) +to expose to the user through sysfs the set of independent access ranges +supported by a device. When the device is initialized, sysfs +registration of the ranges information is done from blk_register_queue() +using the block layer internal function +disk_register_independent_access_ranges(). If a driver calls +disk_set_independent_access_ranges() for a registered queue, e.g. when a +device is revalidated, disk_set_independent_access_ranges() will execute +disk_register_independent_access_ranges() to update the sysfs attribute +files. The sysfs file structure created starts from the +independent_access_ranges sub-directory and contains the start sector +and number of sectors of each range, with the information for each range +grouped in numbered sub-directories. + +E.g. for a dual actuator HDD, the user sees: + +$ tree /sys/block/sdk/queue/independent_access_ranges/ +/sys/block/sdk/queue/independent_access_ranges/ +|-- 0 +| |-- nr_sectors +| `-- sector +`-- 1 + |-- nr_sectors + `-- sector + +For a regular device with a single access range, the +independent_access_ranges sysfs directory does not exist. + +Device revalidation may lead to changes to this structure and to the +attribute values. When manipulated, the queue sysfs_lock and +sysfs_dir_lock mutexes are held for atomicity, similarly to how the +blk-mq and elevator sysfs queue sub-directories are protected. + +The code related to the management of independent access ranges is +added in the new file block/blk-ia-ranges.c. + +Signed-off-by: Damien Le Moal +Reviewed-by: Hannes Reinecke +Reviewed-by: Martin K. Petersen +Reviewed-by: Keith Busch +Link: https://lore.kernel.org/r/20211027022223.183838-2-damien.lemoal@wdc.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/Makefile | 2 +- + block/blk-ia-ranges.c | 348 +++++++++++++++++++++++++++++++++++++++++ + block/blk-sysfs.c | 26 ++- + block/blk.h | 4 + + include/linux/blkdev.h | 39 +++++ + 5 files changed, 410 insertions(+), 9 deletions(-) + create mode 100644 block/blk-ia-ranges.c + +diff --git a/block/Makefile b/block/Makefile +index 602f7f47b7b6..44df57e562bf 100644 +--- a/block/Makefile ++++ b/block/Makefile +@@ -9,7 +9,7 @@ obj-y := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \ + blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ + blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \ + genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o \ +- disk-events.o ++ disk-events.o blk-ia-ranges.o + + obj-$(CONFIG_BOUNCE) += bounce.o + obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o +diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c +new file mode 100644 +index 000000000000..c246c425d0d7 +--- /dev/null ++++ b/block/blk-ia-ranges.c +@@ -0,0 +1,348 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Block device concurrent positioning ranges. ++ * ++ * Copyright (C) 2021 Western Digital Corporation or its Affiliates. ++ */ ++#include ++#include ++#include ++#include ++ ++#include "blk.h" ++ ++static ssize_t ++blk_ia_range_sector_show(struct blk_independent_access_range *iar, ++ char *buf) ++{ ++ return sprintf(buf, "%llu\n", iar->sector); ++} ++ ++static ssize_t ++blk_ia_range_nr_sectors_show(struct blk_independent_access_range *iar, ++ char *buf) ++{ ++ return sprintf(buf, "%llu\n", iar->nr_sectors); ++} ++ ++struct blk_ia_range_sysfs_entry { ++ struct attribute attr; ++ ssize_t (*show)(struct blk_independent_access_range *iar, char *buf); ++}; ++ ++static struct blk_ia_range_sysfs_entry blk_ia_range_sector_entry = { ++ .attr = { .name = "sector", .mode = 0444 }, ++ .show = blk_ia_range_sector_show, ++}; ++ ++static struct blk_ia_range_sysfs_entry blk_ia_range_nr_sectors_entry = { ++ .attr = { .name = "nr_sectors", .mode = 0444 }, ++ .show = blk_ia_range_nr_sectors_show, ++}; ++ ++static struct attribute *blk_ia_range_attrs[] = { ++ &blk_ia_range_sector_entry.attr, ++ &blk_ia_range_nr_sectors_entry.attr, ++ NULL, ++}; ++ATTRIBUTE_GROUPS(blk_ia_range); ++ ++static ssize_t blk_ia_range_sysfs_show(struct kobject *kobj, ++ struct attribute *attr, char *buf) ++{ ++ struct blk_ia_range_sysfs_entry *entry = ++ container_of(attr, struct blk_ia_range_sysfs_entry, attr); ++ struct blk_independent_access_range *iar = ++ container_of(kobj, struct blk_independent_access_range, kobj); ++ ssize_t ret; ++ ++ mutex_lock(&iar->queue->sysfs_lock); ++ ret = entry->show(iar, buf); ++ mutex_unlock(&iar->queue->sysfs_lock); ++ ++ return ret; ++} ++ ++static const struct sysfs_ops blk_ia_range_sysfs_ops = { ++ .show = blk_ia_range_sysfs_show, ++}; ++ ++/* ++ * Independent access range entries are not freed individually, but alltogether ++ * with struct blk_independent_access_ranges and its array of ranges. Since ++ * kobject_add() takes a reference on the parent kobject contained in ++ * struct blk_independent_access_ranges, the array of independent access range ++ * entries cannot be freed until kobject_del() is called for all entries. ++ * So we do not need to do anything here, but still need this no-op release ++ * operation to avoid complaints from the kobject code. ++ */ ++static void blk_ia_range_sysfs_nop_release(struct kobject *kobj) ++{ ++} ++ ++static struct kobj_type blk_ia_range_ktype = { ++ .sysfs_ops = &blk_ia_range_sysfs_ops, ++ .default_groups = blk_ia_range_groups, ++ .release = blk_ia_range_sysfs_nop_release, ++}; ++ ++/* ++ * This will be executed only after all independent access range entries are ++ * removed with kobject_del(), at which point, it is safe to free everything, ++ * including the array of ranges. ++ */ ++static void blk_ia_ranges_sysfs_release(struct kobject *kobj) ++{ ++ struct blk_independent_access_ranges *iars = ++ container_of(kobj, struct blk_independent_access_ranges, kobj); ++ ++ kfree(iars); ++} ++ ++static struct kobj_type blk_ia_ranges_ktype = { ++ .release = blk_ia_ranges_sysfs_release, ++}; ++ ++/** ++ * disk_register_ia_ranges - register with sysfs a set of independent ++ * access ranges ++ * @disk: Target disk ++ * @new_iars: New set of independent access ranges ++ * ++ * Register with sysfs a set of independent access ranges for @disk. ++ * If @new_iars is not NULL, this set of ranges is registered and the old set ++ * specified by q->ia_ranges is unregistered. Otherwise, q->ia_ranges is ++ * registered if it is not already. ++ */ ++int disk_register_independent_access_ranges(struct gendisk *disk, ++ struct blk_independent_access_ranges *new_iars) ++{ ++ struct request_queue *q = disk->queue; ++ struct blk_independent_access_ranges *iars; ++ int i, ret; ++ ++ lockdep_assert_held(&q->sysfs_dir_lock); ++ lockdep_assert_held(&q->sysfs_lock); ++ ++ /* If a new range set is specified, unregister the old one */ ++ if (new_iars) { ++ if (q->ia_ranges) ++ disk_unregister_independent_access_ranges(disk); ++ q->ia_ranges = new_iars; ++ } ++ ++ iars = q->ia_ranges; ++ if (!iars) ++ return 0; ++ ++ /* ++ * At this point, iars is the new set of sector access ranges that needs ++ * to be registered with sysfs. ++ */ ++ WARN_ON(iars->sysfs_registered); ++ ret = kobject_init_and_add(&iars->kobj, &blk_ia_ranges_ktype, ++ &q->kobj, "%s", "independent_access_ranges"); ++ if (ret) { ++ q->ia_ranges = NULL; ++ kfree(iars); ++ return ret; ++ } ++ ++ for (i = 0; i < iars->nr_ia_ranges; i++) { ++ iars->ia_range[i].queue = q; ++ ret = kobject_init_and_add(&iars->ia_range[i].kobj, ++ &blk_ia_range_ktype, &iars->kobj, ++ "%d", i); ++ if (ret) { ++ while (--i >= 0) ++ kobject_del(&iars->ia_range[i].kobj); ++ kobject_del(&iars->kobj); ++ kobject_put(&iars->kobj); ++ return ret; ++ } ++ } ++ ++ iars->sysfs_registered = true; ++ ++ return 0; ++} ++ ++void disk_unregister_independent_access_ranges(struct gendisk *disk) ++{ ++ struct request_queue *q = disk->queue; ++ struct blk_independent_access_ranges *iars = q->ia_ranges; ++ int i; ++ ++ lockdep_assert_held(&q->sysfs_dir_lock); ++ lockdep_assert_held(&q->sysfs_lock); ++ ++ if (!iars) ++ return; ++ ++ if (iars->sysfs_registered) { ++ for (i = 0; i < iars->nr_ia_ranges; i++) ++ kobject_del(&iars->ia_range[i].kobj); ++ kobject_del(&iars->kobj); ++ kobject_put(&iars->kobj); ++ } else { ++ kfree(iars); ++ } ++ ++ q->ia_ranges = NULL; ++} ++ ++static struct blk_independent_access_range * ++disk_find_ia_range(struct blk_independent_access_ranges *iars, ++ sector_t sector) ++{ ++ struct blk_independent_access_range *iar; ++ int i; ++ ++ for (i = 0; i < iars->nr_ia_ranges; i++) { ++ iar = &iars->ia_range[i]; ++ if (sector >= iar->sector && ++ sector < iar->sector + iar->nr_sectors) ++ return iar; ++ } ++ ++ return NULL; ++} ++ ++static bool disk_check_ia_ranges(struct gendisk *disk, ++ struct blk_independent_access_ranges *iars) ++{ ++ struct blk_independent_access_range *iar, *tmp; ++ sector_t capacity = get_capacity(disk); ++ sector_t sector = 0; ++ int i; ++ ++ /* ++ * While sorting the ranges in increasing LBA order, check that the ++ * ranges do not overlap, that there are no sector holes and that all ++ * sectors belong to one range. ++ */ ++ for (i = 0; i < iars->nr_ia_ranges; i++) { ++ tmp = disk_find_ia_range(iars, sector); ++ if (!tmp || tmp->sector != sector) { ++ pr_warn("Invalid non-contiguous independent access ranges\n"); ++ return false; ++ } ++ ++ iar = &iars->ia_range[i]; ++ if (tmp != iar) { ++ swap(iar->sector, tmp->sector); ++ swap(iar->nr_sectors, tmp->nr_sectors); ++ } ++ ++ sector += iar->nr_sectors; ++ } ++ ++ if (sector != capacity) { ++ pr_warn("Independent access ranges do not match disk capacity\n"); ++ return false; ++ } ++ ++ return true; ++} ++ ++static bool disk_ia_ranges_changed(struct gendisk *disk, ++ struct blk_independent_access_ranges *new) ++{ ++ struct blk_independent_access_ranges *old = disk->queue->ia_ranges; ++ int i; ++ ++ if (!old) ++ return true; ++ ++ if (old->nr_ia_ranges != new->nr_ia_ranges) ++ return true; ++ ++ for (i = 0; i < old->nr_ia_ranges; i++) { ++ if (new->ia_range[i].sector != old->ia_range[i].sector || ++ new->ia_range[i].nr_sectors != old->ia_range[i].nr_sectors) ++ return true; ++ } ++ ++ return false; ++} ++ ++/** ++ * disk_alloc_independent_access_ranges - Allocate an independent access ranges ++ * data structure ++ * @disk: target disk ++ * @nr_ia_ranges: Number of independent access ranges ++ * ++ * Allocate a struct blk_independent_access_ranges structure with @nr_ia_ranges ++ * access range descriptors. ++ */ ++struct blk_independent_access_ranges * ++disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges) ++{ ++ struct blk_independent_access_ranges *iars; ++ ++ iars = kzalloc_node(struct_size(iars, ia_range, nr_ia_ranges), ++ GFP_KERNEL, disk->queue->node); ++ if (iars) ++ iars->nr_ia_ranges = nr_ia_ranges; ++ return iars; ++} ++EXPORT_SYMBOL_GPL(disk_alloc_independent_access_ranges); ++ ++/** ++ * disk_set_independent_access_ranges - Set a disk independent access ranges ++ * @disk: target disk ++ * @iars: independent access ranges structure ++ * ++ * Set the independent access ranges information of the request queue ++ * of @disk to @iars. If @iars is NULL and the independent access ranges ++ * structure already set is cleared. If there are no differences between ++ * @iars and the independent access ranges structure already set, @iars ++ * is freed. ++ */ ++void disk_set_independent_access_ranges(struct gendisk *disk, ++ struct blk_independent_access_ranges *iars) ++{ ++ struct request_queue *q = disk->queue; ++ ++ if (WARN_ON_ONCE(iars && !iars->nr_ia_ranges)) { ++ kfree(iars); ++ iars = NULL; ++ } ++ ++ mutex_lock(&q->sysfs_dir_lock); ++ mutex_lock(&q->sysfs_lock); ++ ++ if (iars) { ++ if (!disk_check_ia_ranges(disk, iars)) { ++ kfree(iars); ++ iars = NULL; ++ goto reg; ++ } ++ ++ if (!disk_ia_ranges_changed(disk, iars)) { ++ kfree(iars); ++ goto unlock; ++ } ++ } ++ ++ /* ++ * This may be called for a registered queue. E.g. during a device ++ * revalidation. If that is the case, we need to unregister the old ++ * set of independent access ranges and register the new set. If the ++ * queue is not registered, registration of the device request queue ++ * will register the independent access ranges, so only swap in the ++ * new set and free the old one. ++ */ ++reg: ++ if (blk_queue_registered(q)) { ++ disk_register_independent_access_ranges(disk, iars); ++ } else { ++ swap(q->ia_ranges, iars); ++ kfree(iars); ++ } ++ ++unlock: ++ mutex_unlock(&q->sysfs_lock); ++ mutex_unlock(&q->sysfs_dir_lock); ++} ++EXPORT_SYMBOL_GPL(disk_set_independent_access_ranges); +diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c +index 36f14d658e81..cef1f713370b 100644 +--- a/block/blk-sysfs.c ++++ b/block/blk-sysfs.c +@@ -873,16 +873,15 @@ int blk_register_queue(struct gendisk *disk) + } + + mutex_lock(&q->sysfs_lock); ++ ++ ret = disk_register_independent_access_ranges(disk, NULL); ++ if (ret) ++ goto put_dev; ++ + if (q->elevator) { + ret = elv_register_queue(q, false); +- if (ret) { +- mutex_unlock(&q->sysfs_lock); +- mutex_unlock(&q->sysfs_dir_lock); +- kobject_del(&q->kobj); +- blk_trace_remove_sysfs(dev); +- kobject_put(&dev->kobj); +- return ret; +- } ++ if (ret) ++ goto put_dev; + } + + blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); +@@ -913,6 +912,16 @@ int blk_register_queue(struct gendisk *disk) + percpu_ref_switch_to_percpu(&q->q_usage_counter); + } + ++ return ret; ++ ++put_dev: ++ disk_unregister_independent_access_ranges(disk); ++ mutex_unlock(&q->sysfs_lock); ++ mutex_unlock(&q->sysfs_dir_lock); ++ kobject_del(&q->kobj); ++ blk_trace_remove_sysfs(dev); ++ kobject_put(&dev->kobj); ++ + return ret; + } + +@@ -958,6 +967,7 @@ void blk_unregister_queue(struct gendisk *disk) + mutex_lock(&q->sysfs_lock); + if (q->elevator) + elv_unregister_queue(q); ++ disk_unregister_independent_access_ranges(disk); + mutex_unlock(&q->sysfs_lock); + mutex_unlock(&q->sysfs_dir_lock); + +diff --git a/block/blk.h b/block/blk.h +index 6a039e6c7d07..7afffd548daf 100644 +--- a/block/blk.h ++++ b/block/blk.h +@@ -454,4 +454,8 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg); + + extern const struct address_space_operations def_blk_aops; + ++int disk_register_independent_access_ranges(struct gendisk *disk, ++ struct blk_independent_access_ranges *new_iars); ++void disk_unregister_independent_access_ranges(struct gendisk *disk); ++ + #endif /* BLK_INTERNAL_H */ +diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h +index f72ccb2829db..6d95a4b36cfa 100644 +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -150,6 +150,34 @@ static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev, + + #endif /* CONFIG_BLK_DEV_ZONED */ + ++/* ++ * Independent access ranges: struct blk_independent_access_range describes ++ * a range of contiguous sectors that can be accessed using device command ++ * execution resources that are independent from the resources used for ++ * other access ranges. This is typically found with single-LUN multi-actuator ++ * HDDs where each access range is served by a different set of heads. ++ * The set of independent ranges supported by the device is defined using ++ * struct blk_independent_access_ranges. The independent ranges must not overlap ++ * and must include all sectors within the disk capacity (no sector holes ++ * allowed). ++ * For a device with multiple ranges, requests targeting sectors in different ++ * ranges can be executed in parallel. A request can straddle an access range ++ * boundary. ++ */ ++struct blk_independent_access_range { ++ struct kobject kobj; ++ struct request_queue *queue; ++ sector_t sector; ++ sector_t nr_sectors; ++}; ++ ++struct blk_independent_access_ranges { ++ struct kobject kobj; ++ bool sysfs_registered; ++ unsigned int nr_ia_ranges; ++ struct blk_independent_access_range ia_range[]; ++}; ++ + struct request_queue { + struct request *last_merge; + struct elevator_queue *elevator; +@@ -331,6 +359,12 @@ struct request_queue { + + #define BLK_MAX_WRITE_HINTS 5 + u64 write_hints[BLK_MAX_WRITE_HINTS]; ++ ++ /* ++ * Independent sector access ranges. This is always NULL for ++ * devices that do not have multiple independent access ranges. ++ */ ++ struct blk_independent_access_ranges *ia_ranges; + }; + + /* Keep blk_queue_flag_name[] in sync with the definitions below */ +@@ -698,6 +732,11 @@ extern void blk_queue_update_dma_alignment(struct request_queue *, int); + extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); + extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); + ++struct blk_independent_access_ranges * ++disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges); ++void disk_set_independent_access_ranges(struct gendisk *disk, ++ struct blk_independent_access_ranges *iars); ++ + /* + * Elevator features for blk_queue_required_elevator_features: + */ +-- +2.35.3 + diff --git a/patches.suse/block-Add-invalidate_disk-helper-to-invalidate-the-g.patch b/patches.suse/block-Add-invalidate_disk-helper-to-invalidate-the-g.patch new file mode 100644 index 0000000..0e82068 --- /dev/null +++ b/patches.suse/block-Add-invalidate_disk-helper-to-invalidate-the-g.patch @@ -0,0 +1,69 @@ +From: Xie Yongji +Date: Wed, 22 Sep 2021 20:37:08 +0800 +Subject: [PATCH] block: Add invalidate_disk() helper to invalidate the gendisk +Git-commit: f059a1d2e23a165bf86e33673c6a7535a08c6341 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +To hide internal implementation and simplify some driver code, +this adds a helper to invalidate the gendisk. It will clean the +gendisk's associated buffer/page caches and reset its internal +states. + +Signed-off-by: Xie Yongji +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20210922123711.187-2-xieyongji@bytedance.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/genhd.c | 20 ++++++++++++++++++++ + include/linux/genhd.h | 2 ++ + 2 files changed, 22 insertions(+) + +diff --git a/block/genhd.c b/block/genhd.c +index 80943c123c3e..64f83c4aee99 100644 +--- a/block/genhd.c ++++ b/block/genhd.c +@@ -624,6 +624,26 @@ void del_gendisk(struct gendisk *disk) + } + EXPORT_SYMBOL(del_gendisk); + ++/** ++ * invalidate_disk - invalidate the disk ++ * @disk: the struct gendisk to invalidate ++ * ++ * A helper to invalidates the disk. It will clean the disk's associated ++ * buffer/page caches and reset its internal states so that the disk ++ * can be reused by the drivers. ++ * ++ * Context: can sleep ++ */ ++void invalidate_disk(struct gendisk *disk) ++{ ++ struct block_device *bdev = disk->part0; ++ ++ invalidate_bdev(bdev); ++ bdev->bd_inode->i_mapping->wb_err = 0; ++ set_capacity(disk, 0); ++} ++EXPORT_SYMBOL(invalidate_disk); ++ + /* sysfs access to bad-blocks list. */ + static ssize_t disk_badblocks_show(struct device *dev, + struct device_attribute *attr, +diff --git a/include/linux/genhd.h b/include/linux/genhd.h +index c70bc5fce4db..13f313ab99e7 100644 +--- a/include/linux/genhd.h ++++ b/include/linux/genhd.h +@@ -213,6 +213,8 @@ static inline int add_disk(struct gendisk *disk) + } + extern void del_gendisk(struct gendisk *gp); + ++void invalidate_disk(struct gendisk *disk); ++ + void set_disk_ro(struct gendisk *disk, bool read_only); + + static inline int get_disk_ro(struct gendisk *disk) +-- +2.35.3 + diff --git a/patches.suse/block-Hold-invalidate_lock-in-BLKDISCARD-ioctl.patch b/patches.suse/block-Hold-invalidate_lock-in-BLKDISCARD-ioctl.patch index 9868553..70dfd88 100644 --- a/patches.suse/block-Hold-invalidate_lock-in-BLKDISCARD-ioctl.patch +++ b/patches.suse/block-Hold-invalidate_lock-in-BLKDISCARD-ioctl.patch @@ -39,7 +39,7 @@ index 24beec9ca..a973a79c0 100644 if (!(mode & FMODE_WRITE)) @@ -134,12 +135,17 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, - if (start + len > i_size_read(bdev->bd_inode)) + if (start + len > bdev_nr_bytes(bdev)) return -EINVAL; + filemap_invalidate_lock(inode->i_mapping); diff --git a/patches.suse/block-add-a-bdev_nr_bytes-helper.patch b/patches.suse/block-add-a-bdev_nr_bytes-helper.patch new file mode 100644 index 0000000..bc83744 --- /dev/null +++ b/patches.suse/block-add-a-bdev_nr_bytes-helper.patch @@ -0,0 +1,42 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:02 +0200 +Subject: [PATCH] block: add a bdev_nr_bytes helper +Git-commit: 6436bd90f76e75d2c5786a50203b05a9b7f7100d +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Add a helper to query the size of a block device in bytes. This +will be used to remove open coded access to ->bd_inode. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Link: https://lore.kernel.org/r/20211018101130.1838532-3-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + include/linux/genhd.h | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/include/linux/genhd.h b/include/linux/genhd.h +index cd4038fd5743..01d27f3a970e 100644 +--- a/include/linux/genhd.h ++++ b/include/linux/genhd.h +@@ -236,9 +236,14 @@ static inline sector_t get_start_sect(struct block_device *bdev) + return bdev->bd_start_sect; + } + ++static inline loff_t bdev_nr_bytes(struct block_device *bdev) ++{ ++ return i_size_read(bdev->bd_inode); ++} ++ + static inline sector_t bdev_nr_sectors(struct block_device *bdev) + { +- return i_size_read(bdev->bd_inode) >> 9; ++ return bdev_nr_bytes(bdev) >> SECTOR_SHIFT; + } + + static inline sector_t get_capacity(struct gendisk *disk) +-- +2.35.3 + diff --git a/patches.suse/block-add-a-get_unique_id-method.patch b/patches.suse/block-add-a-get_unique_id-method.patch new file mode 100644 index 0000000..0370a5b --- /dev/null +++ b/patches.suse/block-add-a-get_unique_id-method.patch @@ -0,0 +1,58 @@ +From: Christoph Hellwig +Date: Thu, 21 Oct 2021 08:06:01 +0200 +Subject: [PATCH] block: add a ->get_unique_id method +Git-commit: 9208d414975895f69e9aca49153060ddd31b18d0 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Add a method to query unique IDs from block devices. It will be used to +remove code that deeply pokes into SCSI internals in the NFS server. +The implementation in the sd driver itself is also much nicer as it can +use the cached VPD page instead of always sending a command as the +current NFS code does. + +For now the interface is kept very minimal but could be easily +extended when other users like a block-layer sysfs interface for +uniquue IDs shows up. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Hannes Reinecke +Link: https://lore.kernel.org/r/20211021060607.264371-2-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + include/linux/blkdev.h | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h +index f72ccb2829db..0d5826066e16 100644 +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -1158,6 +1158,14 @@ static inline void blk_crypto_unregister(struct request_queue *q) { } + + #endif /* CONFIG_BLK_INLINE_ENCRYPTION */ + ++enum blk_unique_id { ++ /* these match the Designator Types specified in SPC */ ++ BLK_UID_T10 = 1, ++ BLK_UID_EUI64 = 2, ++ BLK_UID_NAA = 3, ++}; ++ ++#define NFL4_UFLG_MASK 0x0000003F + + struct block_device_operations { + void (*submit_bio)(struct bio *bio); +@@ -1176,6 +1184,9 @@ struct block_device_operations { + int (*report_zones)(struct gendisk *, sector_t sector, + unsigned int nr_zones, report_zones_cb cb, void *data); + char *(*devnode)(struct gendisk *disk, umode_t *mode); ++ /* returns the length of the identifier or a negative errno: */ ++ int (*get_unique_id)(struct gendisk *disk, u8 id[16], ++ enum blk_unique_id id_type); + struct module *owner; + const struct pr_ops *pr_ops; + +-- +2.35.3 + diff --git a/patches.suse/block-add-a-sb_bdev_nr_blocks-helper.patch b/patches.suse/block-add-a-sb_bdev_nr_blocks-helper.patch new file mode 100644 index 0000000..68f6393 --- /dev/null +++ b/patches.suse/block-add-a-sb_bdev_nr_blocks-helper.patch @@ -0,0 +1,40 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:25 +0200 +Subject: [PATCH] block: add a sb_bdev_nr_blocks helper +Git-commit: bcc6e2cfaa48a4ad2e17719194f6d97a8e03f6c1 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Add a helper to return the size of sb->s_bdev in sb->s_blocksize_bits +based unites. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Reviewed-by: Chaitanya Kulkarni +Link: https://lore.kernel.org/r/20211018101130.1838532-26-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + include/linux/genhd.h | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/include/linux/genhd.h b/include/linux/genhd.h +index 01d27f3a970e..7b0326661a1e 100644 +--- a/include/linux/genhd.h ++++ b/include/linux/genhd.h +@@ -251,6 +251,12 @@ static inline sector_t get_capacity(struct gendisk *disk) + return bdev_nr_sectors(disk->part0); + } + ++static inline u64 sb_bdev_nr_blocks(struct super_block *sb) ++{ ++ return bdev_nr_sectors(sb->s_bdev) >> ++ (sb->s_blocksize_bits - SECTOR_SHIFT); ++} ++ + int bdev_disk_changed(struct gendisk *disk, bool invalidate); + void blk_drop_partitions(struct gendisk *disk); + +-- +2.35.3 + diff --git a/patches.suse/block-add-a-struct-io_comp_batch-argument-to-fops-io.patch b/patches.suse/block-add-a-struct-io_comp_batch-argument-to-fops-io.patch new file mode 100644 index 0000000..fbd1bfd --- /dev/null +++ b/patches.suse/block-add-a-struct-io_comp_batch-argument-to-fops-io.patch @@ -0,0 +1,347 @@ +From: Jens Axboe +Date: Tue, 12 Oct 2021 09:24:29 -0600 +Subject: [PATCH] block: add a struct io_comp_batch argument to fops->iopoll() +Git-commit: 5a72e899ceb465d731c413d57c6c12cdbf88303c +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +struct io_comp_batch contains a list head and a completion handler, which +will allow completions to more effciently completed batches of IO. + +For now, no functional changes in this patch, we just define the +io_comp_batch structure and add the argument to the file_operations iopoll +handler. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-core.c | 9 +++++---- + block/blk-exec.c | 2 +- + block/blk-mq.c | 9 +++++---- + block/blk-mq.h | 3 ++- + block/fops.c | 4 ++-- + drivers/block/rnbd/rnbd-clt.c | 2 +- + drivers/nvme/host/pci.c | 4 ++-- + drivers/nvme/host/rdma.c | 2 +- + drivers/nvme/host/tcp.c | 2 +- + drivers/scsi/scsi_lib.c | 2 +- + fs/io_uring.c | 2 +- + fs/iomap/direct-io.c | 2 +- + include/linux/blk-mq.h | 2 +- + include/linux/blkdev.h | 13 +++++++++++-- + include/linux/fs.h | 4 +++- + mm/page_io.c | 2 +- + 16 files changed, 39 insertions(+), 25 deletions(-) + +diff --git a/block/blk-core.c b/block/blk-core.c +index 20b6cc06461a..d0c2e11411d0 100644 +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -1078,7 +1078,7 @@ EXPORT_SYMBOL(submit_bio); + * Note: the caller must either be the context that submitted @bio, or + * be in a RCU critical section to prevent freeing of @bio. + */ +-int bio_poll(struct bio *bio, unsigned int flags) ++int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) + { + struct request_queue *q = bio->bi_bdev->bd_disk->queue; + blk_qc_t cookie = READ_ONCE(bio->bi_cookie); +@@ -1096,7 +1096,7 @@ int bio_poll(struct bio *bio, unsigned int flags) + if (WARN_ON_ONCE(!queue_is_mq(q))) + ret = 0; /* not yet implemented, should not happen */ + else +- ret = blk_mq_poll(q, cookie, flags); ++ ret = blk_mq_poll(q, cookie, iob, flags); + blk_queue_exit(q); + return ret; + } +@@ -1106,7 +1106,8 @@ EXPORT_SYMBOL_GPL(bio_poll); + * Helper to implement file_operations.iopoll. Requires the bio to be stored + * in iocb->private, and cleared before freeing the bio. + */ +-int iocb_bio_iopoll(struct kiocb *kiocb, unsigned int flags) ++int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob, ++ unsigned int flags) + { + struct bio *bio; + int ret = 0; +@@ -1134,7 +1135,7 @@ int iocb_bio_iopoll(struct kiocb *kiocb, unsigned int flags) + rcu_read_lock(); + bio = READ_ONCE(kiocb->private); + if (bio && bio->bi_bdev) +- ret = bio_poll(bio, flags); ++ ret = bio_poll(bio, iob, flags); + rcu_read_unlock(); + + return ret; +diff --git a/block/blk-exec.c b/block/blk-exec.c +index 55f0cd34b37b..1b8b47f6e79b 100644 +--- a/block/blk-exec.c ++++ b/block/blk-exec.c +@@ -77,7 +77,7 @@ static bool blk_rq_is_poll(struct request *rq) + static void blk_rq_poll_completion(struct request *rq, struct completion *wait) + { + do { +- bio_poll(rq->bio, 0); ++ bio_poll(rq->bio, NULL, 0); + cond_resched(); + } while (!completion_done(wait)); + } +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 74505b545dd3..79c25b64e8b0 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -4174,14 +4174,14 @@ static bool blk_mq_poll_hybrid(struct request_queue *q, blk_qc_t qc) + } + + static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie, +- unsigned int flags) ++ struct io_comp_batch *iob, unsigned int flags) + { + struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie); + long state = get_current_state(); + int ret; + + do { +- ret = q->mq_ops->poll(hctx); ++ ret = q->mq_ops->poll(hctx, iob); + if (ret > 0) { + __set_current_state(TASK_RUNNING); + return ret; +@@ -4201,14 +4201,15 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie, + return 0; + } + +-int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags) ++int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob, ++ unsigned int flags) + { + if (!(flags & BLK_POLL_NOSLEEP) && + q->poll_nsec != BLK_MQ_POLL_CLASSIC) { + if (blk_mq_poll_hybrid(q, cookie)) + return 1; + } +- return blk_mq_poll_classic(q, cookie, flags); ++ return blk_mq_poll_classic(q, cookie, iob, flags); + } + + unsigned int blk_mq_rq_cpu(struct request *rq) +diff --git a/block/blk-mq.h b/block/blk-mq.h +index 1b91a3fdaa01..ebf67f4d4f2e 100644 +--- a/block/blk-mq.h ++++ b/block/blk-mq.h +@@ -31,7 +31,8 @@ struct blk_mq_ctx { + } ____cacheline_aligned_in_smp; + + void blk_mq_submit_bio(struct bio *bio); +-int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags); ++int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob, ++ unsigned int flags); + void blk_mq_exit_queue(struct request_queue *q); + int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); + void blk_mq_wake_waiters(struct request_queue *q); +diff --git a/block/fops.c b/block/fops.c +index 1d4f862950bb..2c43e493e37c 100644 +--- a/block/fops.c ++++ b/block/fops.c +@@ -105,7 +105,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, + set_current_state(TASK_UNINTERRUPTIBLE); + if (!READ_ONCE(bio.bi_private)) + break; +- if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, 0)) ++ if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, NULL, 0)) + blk_io_schedule(); + } + __set_current_state(TASK_RUNNING); +@@ -291,7 +291,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + if (!READ_ONCE(dio->waiter)) + break; + +- if (!do_poll || !bio_poll(bio, 0)) ++ if (!do_poll || !bio_poll(bio, NULL, 0)) + blk_io_schedule(); + } + __set_current_state(TASK_RUNNING); +diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c +index bd4a41afbbfc..0ec0191d4196 100644 +--- a/drivers/block/rnbd/rnbd-clt.c ++++ b/drivers/block/rnbd/rnbd-clt.c +@@ -1176,7 +1176,7 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx, + return ret; + } + +-static int rnbd_rdma_poll(struct blk_mq_hw_ctx *hctx) ++static int rnbd_rdma_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) + { + struct rnbd_queue *q = hctx->driver_data; + struct rnbd_clt_dev *dev = q->dev; +diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c +index 896328271471..bb0482dfab3c 100644 +--- a/drivers/nvme/host/pci.c ++++ b/drivers/nvme/host/pci.c +@@ -1092,7 +1092,7 @@ static void nvme_poll_irqdisable(struct nvme_queue *nvmeq) + enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); + } + +-static int nvme_poll(struct blk_mq_hw_ctx *hctx) ++static int nvme_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) + { + struct nvme_queue *nvmeq = hctx->driver_data; + bool found; +@@ -1274,7 +1274,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) + * Did we miss an interrupt? + */ + if (test_bit(NVMEQ_POLLED, &nvmeq->flags)) +- nvme_poll(req->mq_hctx); ++ nvme_poll(req->mq_hctx, NULL); + else + nvme_poll_irqdisable(nvmeq); + +diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c +index 40317e1b9183..1624da3702d4 100644 +--- a/drivers/nvme/host/rdma.c ++++ b/drivers/nvme/host/rdma.c +@@ -2106,7 +2106,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, + return ret; + } + +-static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx) ++static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) + { + struct nvme_rdma_queue *queue = hctx->driver_data; + +diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c +index 3c1c29dd3020..9ce3458ee1dd 100644 +--- a/drivers/nvme/host/tcp.c ++++ b/drivers/nvme/host/tcp.c +@@ -2429,7 +2429,7 @@ static int nvme_tcp_map_queues(struct blk_mq_tag_set *set) + return 0; + } + +-static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx) ++static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) + { + struct nvme_tcp_queue *queue = hctx->driver_data; + struct sock *sk = queue->sock->sk; +diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c +index 33fd9a01330c..30f7d0b4eb73 100644 +--- a/drivers/scsi/scsi_lib.c ++++ b/drivers/scsi/scsi_lib.c +@@ -1784,7 +1784,7 @@ static void scsi_mq_exit_request(struct blk_mq_tag_set *set, struct request *rq, + } + + +-static int scsi_mq_poll(struct blk_mq_hw_ctx *hctx) ++static int scsi_mq_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) + { + struct Scsi_Host *shost = hctx->driver_data; + +diff --git a/fs/io_uring.c b/fs/io_uring.c +index c5066146b8de..cd77a137f2d8 100644 +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -2483,7 +2483,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, + if (!list_empty(&done)) + break; + +- ret = kiocb->ki_filp->f_op->iopoll(kiocb, poll_flags); ++ ret = kiocb->ki_filp->f_op->iopoll(kiocb, NULL, poll_flags); + if (unlikely(ret < 0)) + return ret; + else if (ret) +diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c +index 8efab177011d..83ecfba53abe 100644 +--- a/fs/iomap/direct-io.c ++++ b/fs/iomap/direct-io.c +@@ -630,7 +630,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + break; + + if (!dio->submit.poll_bio || +- !bio_poll(dio->submit.poll_bio, 0)) ++ !bio_poll(dio->submit.poll_bio, NULL, 0)) + blk_io_schedule(); + } + __set_current_state(TASK_RUNNING); +diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h +index 9fb8618fb957..4c79439af2f2 100644 +--- a/include/linux/blk-mq.h ++++ b/include/linux/blk-mq.h +@@ -532,7 +532,7 @@ struct blk_mq_ops { + /** + * @poll: Called to poll for completion of a specific tag. + */ +- int (*poll)(struct blk_mq_hw_ctx *); ++ int (*poll)(struct blk_mq_hw_ctx *, struct io_comp_batch *); + + /** + * @complete: Mark the request as complete. +diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h +index b0a322172965..fd9771a1da09 100644 +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -569,8 +569,9 @@ blk_status_t errno_to_blk_status(int errno); + #define BLK_POLL_ONESHOT (1 << 0) + /* do not sleep to wait for the expected completion time */ + #define BLK_POLL_NOSLEEP (1 << 1) +-int bio_poll(struct bio *bio, unsigned int flags); +-int iocb_bio_iopoll(struct kiocb *kiocb, unsigned int flags); ++int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags); ++int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob, ++ unsigned int flags); + + static inline struct request_queue *bdev_get_queue(struct block_device *bdev) + { +@@ -1298,6 +1299,14 @@ int fsync_bdev(struct block_device *bdev); + int freeze_bdev(struct block_device *bdev); + int thaw_bdev(struct block_device *bdev); + ++struct io_comp_batch { ++ struct request *req_list; ++ bool need_ts; ++ void (*complete)(struct io_comp_batch *); ++}; ++ ++#define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { } ++ + #define rq_list_add(listptr, rq) do { \ + (rq)->rq_next = *(listptr); \ + *(listptr) = rq; \ +diff --git a/include/linux/fs.h b/include/linux/fs.h +index f595f4097cb7..31029a91f440 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -48,6 +48,7 @@ + struct backing_dev_info; + struct bdi_writeback; + struct bio; ++struct io_comp_batch; + struct export_operations; + struct fiemap_extent_info; + struct hd_geometry; +@@ -2071,7 +2072,8 @@ struct file_operations { + ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); + ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); + ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); +- int (*iopoll)(struct kiocb *kiocb, unsigned int flags); ++ int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *, ++ unsigned int flags); + int (*iterate) (struct file *, struct dir_context *); + int (*iterate_shared) (struct file *, struct dir_context *); + __poll_t (*poll) (struct file *, struct poll_table_struct *); +diff --git a/mm/page_io.c b/mm/page_io.c +index a68faab5b310..6010fb07f231 100644 +--- a/mm/page_io.c ++++ b/mm/page_io.c +@@ -424,7 +424,7 @@ int swap_readpage(struct page *page, bool synchronous) + if (!READ_ONCE(bio->bi_private)) + break; + +- if (!bio_poll(bio, 0)) ++ if (!bio_poll(bio, NULL, 0)) + blk_io_schedule(); + } + __set_current_state(TASK_RUNNING); +-- +2.35.3 + diff --git a/patches.suse/block-add-async-version-of-bio_set_polled.patch b/patches.suse/block-add-async-version-of-bio_set_polled.patch new file mode 100644 index 0000000..a18305f --- /dev/null +++ b/patches.suse/block-add-async-version-of-bio_set_polled.patch @@ -0,0 +1,43 @@ +From: Pavel Begunkov +Date: Wed, 27 Oct 2021 13:21:10 +0100 +Subject: [PATCH] block: add async version of bio_set_polled +Git-commit: 842e39b013465a279fb60348427b9309427a29de +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +If we know that a iocb is async we can optimise bio_set_polled() a bit, +add a new helper bio_set_polled_async(). + +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/8fa137885164a5d05fadcff4c3521da8d5a83d00.1635337135.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/fops.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/block/fops.c b/block/fops.c +index 8594852bd344..a2f492e50782 100644 +--- a/block/fops.c ++++ b/block/fops.c +@@ -358,14 +358,13 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, + task_io_account_write(bio->bi_iter.bi_size); + } + +- if (iocb->ki_flags & IOCB_NOWAIT) +- bio->bi_opf |= REQ_NOWAIT; +- + if (iocb->ki_flags & IOCB_HIPRI) { +- bio_set_polled(bio, iocb); ++ bio->bi_opf |= REQ_POLLED | REQ_NOWAIT; + submit_bio(bio); + WRITE_ONCE(iocb->private, bio); + } else { ++ if (iocb->ki_flags & IOCB_NOWAIT) ++ bio->bi_opf |= REQ_NOWAIT; + submit_bio(bio); + } + return -EIOCBQUEUED; +-- +2.35.3 + diff --git a/patches.suse/block-add-rq_flags-to-struct-blk_mq_alloc_data.patch b/patches.suse/block-add-rq_flags-to-struct-blk_mq_alloc_data.patch new file mode 100644 index 0000000..ae44fc2 --- /dev/null +++ b/patches.suse/block-add-rq_flags-to-struct-blk_mq_alloc_data.patch @@ -0,0 +1,109 @@ +From: Jens Axboe +Date: Tue, 19 Oct 2021 09:32:57 -0600 +Subject: [PATCH] block: add rq_flags to struct blk_mq_alloc_data +Git-commit: 56f8da642bd827ef50a952e7bc3728c5830452be +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +There's a hole here we can use, and it's faster to set this earlier +rather than need to check q->elevator multiple times. + +Signed-off-by: Jens Axboe +Link: https://lore.kernel.org/r/20211019153300.623322-2-axboe@kernel.dk +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 20 ++++++++++---------- + block/blk-mq.h | 8 ++++---- + 2 files changed, 14 insertions(+), 14 deletions(-) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 9840b15f505b..a4d5b779a65a 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -321,25 +321,22 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, + struct blk_mq_ctx *ctx = data->ctx; + struct blk_mq_hw_ctx *hctx = data->hctx; + struct request_queue *q = data->q; +- struct elevator_queue *e = q->elevator; + struct blk_mq_tags *tags = blk_mq_tags_from_data(data); + struct request *rq = tags->static_rqs[tag]; +- unsigned int rq_flags = 0; + +- if (e) { +- rq_flags = RQF_ELV; +- rq->tag = BLK_MQ_NO_TAG; +- rq->internal_tag = tag; +- } else { ++ if (!(data->rq_flags & RQF_ELV)) { + rq->tag = tag; + rq->internal_tag = BLK_MQ_NO_TAG; ++ } else { ++ rq->tag = BLK_MQ_NO_TAG; ++ rq->internal_tag = tag; + } + + if (data->flags & BLK_MQ_REQ_PM) +- rq_flags |= RQF_PM; ++ data->rq_flags |= RQF_PM; + if (blk_queue_io_stat(q)) +- rq_flags |= RQF_IO_STAT; +- rq->rq_flags = rq_flags; ++ data->rq_flags |= RQF_IO_STAT; ++ rq->rq_flags = data->rq_flags; + + if (blk_mq_need_time_stamp(rq)) + rq->start_time_ns = ktime_get_ns(); +@@ -490,6 +487,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, + .q = q, + .flags = flags, + .cmd_flags = op, ++ .rq_flags = q->elevator ? RQF_ELV : 0, + .nr_tags = 1, + }; + struct request *rq; +@@ -519,6 +517,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, + .q = q, + .flags = flags, + .cmd_flags = op, ++ .rq_flags = q->elevator ? RQF_ELV : 0, + .nr_tags = 1, + }; + u64 alloc_time_ns = 0; +@@ -2512,6 +2511,7 @@ void blk_mq_submit_bio(struct bio *bio) + .q = q, + .nr_tags = 1, + .cmd_flags = bio->bi_opf, ++ .rq_flags = q->elevator ? RQF_ELV : 0, + }; + + if (plug) { +diff --git a/block/blk-mq.h b/block/blk-mq.h +index 08fb5922e611..28859fc5faee 100644 +--- a/block/blk-mq.h ++++ b/block/blk-mq.h +@@ -149,6 +149,7 @@ struct blk_mq_alloc_data { + blk_mq_req_flags_t flags; + unsigned int shallow_depth; + unsigned int cmd_flags; ++ unsigned int rq_flags; + + /* allocate multiple requests/tags in one go */ + unsigned int nr_tags; +@@ -166,10 +167,9 @@ static inline bool blk_mq_is_shared_tags(unsigned int flags) + + static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data) + { +- if (data->q->elevator) +- return data->hctx->sched_tags; +- +- return data->hctx->tags; ++ if (!(data->rq_flags & RQF_ELV)) ++ return data->hctx->tags; ++ return data->hctx->sched_tags; + } + + static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx) +-- +2.35.3 + diff --git a/patches.suse/block-add-single-bio-async-direct-IO-helper.patch b/patches.suse/block-add-single-bio-async-direct-IO-helper.patch new file mode 100644 index 0000000..e66f5bf --- /dev/null +++ b/patches.suse/block-add-single-bio-async-direct-IO-helper.patch @@ -0,0 +1,128 @@ +From: Pavel Begunkov +Date: Sat, 23 Oct 2021 17:21:32 +0100 +Subject: [PATCH] block: add single bio async direct IO helper +Git-commit: 54a88eb838d37af930c9f19e1930a4fba6789cb5 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +As with __blkdev_direct_IO_simple(), we can implement direct IO more +efficiently if there is only one bio. Add __blkdev_direct_IO_async() and +blkdev_bio_end_io_async(). This patch brings me from 4.45-4.5 MIOPS with +nullblk to 4.7+. + +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/f0ae4109b7a6934adede490f84d188d53b97051b.1635006010.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/fops.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 84 insertions(+), 3 deletions(-) + +diff --git a/block/fops.c b/block/fops.c +index 396537598e3e..a7b328296912 100644 +--- a/block/fops.c ++++ b/block/fops.c +@@ -305,6 +305,85 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + return ret; + } + ++static void blkdev_bio_end_io_async(struct bio *bio) ++{ ++ struct blkdev_dio *dio = container_of(bio, struct blkdev_dio, bio); ++ struct kiocb *iocb = dio->iocb; ++ ssize_t ret; ++ ++ if (likely(!bio->bi_status)) { ++ ret = dio->size; ++ iocb->ki_pos += ret; ++ } else { ++ ret = blk_status_to_errno(bio->bi_status); ++ } ++ ++ iocb->ki_complete(iocb, ret, 0); ++ ++ if (dio->flags & DIO_SHOULD_DIRTY) { ++ bio_check_pages_dirty(bio); ++ } else { ++ bio_release_pages(bio, false); ++ bio_put(bio); ++ } ++} ++ ++static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, ++ struct iov_iter *iter, ++ unsigned int nr_pages) ++{ ++ struct block_device *bdev = iocb->ki_filp->private_data; ++ struct blkdev_dio *dio; ++ struct bio *bio; ++ loff_t pos = iocb->ki_pos; ++ int ret = 0; ++ ++ if ((pos | iov_iter_alignment(iter)) & ++ (bdev_logical_block_size(bdev) - 1)) ++ return -EINVAL; ++ ++ bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool); ++ dio = container_of(bio, struct blkdev_dio, bio); ++ dio->flags = 0; ++ dio->iocb = iocb; ++ bio_set_dev(bio, bdev); ++ bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT; ++ bio->bi_write_hint = iocb->ki_hint; ++ bio->bi_end_io = blkdev_bio_end_io_async; ++ bio->bi_ioprio = iocb->ki_ioprio; ++ ++ ret = bio_iov_iter_get_pages(bio, iter); ++ if (unlikely(ret)) { ++ bio->bi_status = BLK_STS_IOERR; ++ bio_endio(bio); ++ return ret; ++ } ++ dio->size = bio->bi_iter.bi_size; ++ ++ if (iov_iter_rw(iter) == READ) { ++ bio->bi_opf = REQ_OP_READ; ++ if (iter_is_iovec(iter)) { ++ dio->flags |= DIO_SHOULD_DIRTY; ++ bio_set_pages_dirty(bio); ++ } ++ } else { ++ bio->bi_opf = dio_bio_write_op(iocb); ++ task_io_account_write(bio->bi_iter.bi_size); ++ } ++ ++ if (iocb->ki_flags & IOCB_NOWAIT) ++ bio->bi_opf |= REQ_NOWAIT; ++ ++ if (iocb->ki_flags & IOCB_HIPRI) { ++ bio_set_polled(bio, iocb); ++ submit_bio(bio); ++ WRITE_ONCE(iocb->private, bio); ++ } else { ++ submit_bio(bio); ++ } ++ return -EIOCBQUEUED; ++} ++ + static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) + { + unsigned int nr_pages; +@@ -313,9 +392,11 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) + return 0; + + nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1); +- if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS) +- return __blkdev_direct_IO_simple(iocb, iter, nr_pages); +- ++ if (likely(nr_pages <= BIO_MAX_VECS)) { ++ if (is_sync_kiocb(iocb)) ++ return __blkdev_direct_IO_simple(iocb, iter, nr_pages); ++ return __blkdev_direct_IO_async(iocb, iter, nr_pages); ++ } + return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages)); + } + +-- +2.35.3 + diff --git a/patches.suse/block-add-support-for-blk_mq_end_request_batch.patch b/patches.suse/block-add-support-for-blk_mq_end_request_batch.patch new file mode 100644 index 0000000..38ed51e --- /dev/null +++ b/patches.suse/block-add-support-for-blk_mq_end_request_batch.patch @@ -0,0 +1,203 @@ +From: Jens Axboe +Date: Fri, 8 Oct 2021 05:50:46 -0600 +Subject: [PATCH] block: add support for blk_mq_end_request_batch() +Git-commit: f794f3351f2672d782b8df0fa59f3cef38cffa59 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Instead of calling blk_mq_end_request() on a single request, add a helper +that takes the new struct io_comp_batch and completes any request stored +in there. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq-tag.c | 6 ++++ + block/blk-mq-tag.h | 1 + + block/blk-mq.c | 82 ++++++++++++++++++++++++++++++++---------- + include/linux/blk-mq.h | 29 +++++++++++++++ + 4 files changed, 99 insertions(+), 19 deletions(-) + +diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c +index c43b97201161..b94c3e8ef392 100644 +--- a/block/blk-mq-tag.c ++++ b/block/blk-mq-tag.c +@@ -207,6 +207,12 @@ void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, + } + } + ++void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags) ++{ ++ sbitmap_queue_clear_batch(&tags->bitmap_tags, tags->nr_reserved_tags, ++ tag_array, nr_tags); ++} ++ + struct bt_iter_data { + struct blk_mq_hw_ctx *hctx; + busy_iter_fn *fn; +diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h +index 71c2f7d8e9b7..78ae2fb8e2a4 100644 +--- a/block/blk-mq-tag.h ++++ b/block/blk-mq-tag.h +@@ -42,6 +42,7 @@ unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags, + unsigned int *offset); + extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, + unsigned int tag); ++void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags); + extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, + struct blk_mq_tags **tags, + unsigned int depth, bool can_grow); +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 79c25b64e8b0..9248edd8a7d3 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -300,15 +300,6 @@ void blk_mq_wake_waiters(struct request_queue *q) + blk_mq_tag_wakeup_all(hctx->tags, true); + } + +-/* +- * Only need start/end time stamping if we have iostat or +- * blk stats enabled, or using an IO scheduler. +- */ +-static inline bool blk_mq_need_time_stamp(struct request *rq) +-{ +- return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV)); +-} +- + static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, + unsigned int tag, u64 alloc_time_ns) + { +@@ -768,19 +759,21 @@ bool blk_update_request(struct request *req, blk_status_t error, + } + EXPORT_SYMBOL_GPL(blk_update_request); + +-inline void __blk_mq_end_request(struct request *rq, blk_status_t error) ++static inline void __blk_mq_end_request_acct(struct request *rq, u64 now) + { +- if (blk_mq_need_time_stamp(rq)) { +- u64 now = ktime_get_ns(); ++ if (rq->rq_flags & RQF_STATS) { ++ blk_mq_poll_stats_start(rq->q); ++ blk_stat_add(rq, now); ++ } + +- if (rq->rq_flags & RQF_STATS) { +- blk_mq_poll_stats_start(rq->q); +- blk_stat_add(rq, now); +- } ++ blk_mq_sched_completed_request(rq, now); ++ blk_account_io_done(rq, now); ++} + +- blk_mq_sched_completed_request(rq, now); +- blk_account_io_done(rq, now); +- } ++inline void __blk_mq_end_request(struct request *rq, blk_status_t error) ++{ ++ if (blk_mq_need_time_stamp(rq)) ++ __blk_mq_end_request_acct(rq, ktime_get_ns()); + + if (rq->end_io) { + rq_qos_done(rq->q, rq); +@@ -799,6 +792,57 @@ void blk_mq_end_request(struct request *rq, blk_status_t error) + } + EXPORT_SYMBOL(blk_mq_end_request); + ++#define TAG_COMP_BATCH 32 ++ ++static inline void blk_mq_flush_tag_batch(struct blk_mq_hw_ctx *hctx, ++ int *tag_array, int nr_tags) ++{ ++ struct request_queue *q = hctx->queue; ++ ++ blk_mq_put_tags(hctx->tags, tag_array, nr_tags); ++ percpu_ref_put_many(&q->q_usage_counter, nr_tags); ++} ++ ++void blk_mq_end_request_batch(struct io_comp_batch *iob) ++{ ++ int tags[TAG_COMP_BATCH], nr_tags = 0; ++ struct blk_mq_hw_ctx *last_hctx = NULL; ++ struct request *rq; ++ u64 now = 0; ++ ++ if (iob->need_ts) ++ now = ktime_get_ns(); ++ ++ while ((rq = rq_list_pop(&iob->req_list)) != NULL) { ++ prefetch(rq->bio); ++ prefetch(rq->rq_next); ++ ++ blk_update_request(rq, BLK_STS_OK, blk_rq_bytes(rq)); ++ if (iob->need_ts) ++ __blk_mq_end_request_acct(rq, now); ++ ++ WRITE_ONCE(rq->state, MQ_RQ_IDLE); ++ if (!refcount_dec_and_test(&rq->ref)) ++ continue; ++ ++ blk_crypto_free_request(rq); ++ blk_pm_mark_last_busy(rq); ++ rq_qos_done(rq->q, rq); ++ ++ if (nr_tags == TAG_COMP_BATCH || ++ (last_hctx && last_hctx != rq->mq_hctx)) { ++ blk_mq_flush_tag_batch(last_hctx, tags, nr_tags); ++ nr_tags = 0; ++ } ++ tags[nr_tags++] = rq->tag; ++ last_hctx = rq->mq_hctx; ++ } ++ ++ if (nr_tags) ++ blk_mq_flush_tag_batch(last_hctx, tags, nr_tags); ++} ++EXPORT_SYMBOL_GPL(blk_mq_end_request_batch); ++ + static void blk_complete_reqs(struct llist_head *list) + { + struct llist_node *entry = llist_reverse_order(llist_del_all(list)); +diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h +index 4c79439af2f2..656fe34bdb6c 100644 +--- a/include/linux/blk-mq.h ++++ b/include/linux/blk-mq.h +@@ -728,6 +728,35 @@ static inline void blk_mq_set_request_complete(struct request *rq) + void blk_mq_start_request(struct request *rq); + void blk_mq_end_request(struct request *rq, blk_status_t error); + void __blk_mq_end_request(struct request *rq, blk_status_t error); ++void blk_mq_end_request_batch(struct io_comp_batch *ib); ++ ++/* ++ * Only need start/end time stamping if we have iostat or ++ * blk stats enabled, or using an IO scheduler. ++ */ ++static inline bool blk_mq_need_time_stamp(struct request *rq) ++{ ++ return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV)); ++} ++ ++/* ++ * Batched completions only work when there is no I/O error and no special ++ * ->end_io handler. ++ */ ++static inline bool blk_mq_add_to_batch(struct request *req, ++ struct io_comp_batch *iob, int ioerror, ++ void (*complete)(struct io_comp_batch *)) ++{ ++ if (!iob || (req->rq_flags & RQF_ELV) || req->end_io || ioerror) ++ return false; ++ if (!iob->complete) ++ iob->complete = complete; ++ else if (iob->complete != complete) ++ return false; ++ iob->need_ts |= blk_mq_need_time_stamp(req); ++ rq_list_add(&iob->req_list, req); ++ return true; ++} + + void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); + void blk_mq_kick_requeue_list(struct request_queue *q); +-- +2.35.3 + diff --git a/patches.suse/block-align-blkdev_dio-inlined-bio-to-a-cacheline.patch b/patches.suse/block-align-blkdev_dio-inlined-bio-to-a-cacheline.patch new file mode 100644 index 0000000..0031f92 --- /dev/null +++ b/patches.suse/block-align-blkdev_dio-inlined-bio-to-a-cacheline.patch @@ -0,0 +1,34 @@ +From: Jens Axboe +Date: Fri, 15 Oct 2021 16:55:05 -0600 +Subject: [PATCH] block: align blkdev_dio inlined bio to a cacheline +Git-commit: 6155631a0c3b7ca795a0edb44894e58dc3e3d798 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We get all sorts of unreliable and funky results since the bio is +designed to align on a cacheline, which it does not when inlined like +this. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/fops.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/fops.c b/block/fops.c +index 2c43e493e37c..21d25ee0e4bf 100644 +--- a/block/fops.c ++++ b/block/fops.c +@@ -137,7 +137,7 @@ struct blkdev_dio { + size_t size; + atomic_t ref; + unsigned int flags; +- struct bio bio; ++ struct bio bio ____cacheline_aligned_in_smp; + }; + + static struct bio_set blkdev_dio_pool; +-- +2.35.3 + diff --git a/patches.suse/block-ataflop-Fix-warning-comparing-pointer-to-0.patch b/patches.suse/block-ataflop-Fix-warning-comparing-pointer-to-0.patch new file mode 100644 index 0000000..7240d53 --- /dev/null +++ b/patches.suse/block-ataflop-Fix-warning-comparing-pointer-to-0.patch @@ -0,0 +1,37 @@ +From: Jiapeng Chong +Date: Fri, 29 Oct 2021 17:50:29 +0800 +Subject: [PATCH] block: ataflop: Fix warning comparing pointer to 0 +Git-commit: df75db1fc1e5608271397de37cab43371bb838d2 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Fix the following coccicheck warning: + +./drivers/block/ataflop.c:1464:20-21: WARNING comparing pointer to 0. + +Reported-by: Abaci Robot +Signed-off-by: Jiapeng Chong +Link: https://lore.kernel.org/r/1635501029-81391-1-git-send-email-jiapeng.chong@linux.alibaba.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/ataflop.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c +index 2622803ef71a..d14bdc3589b2 100644 +--- a/drivers/block/ataflop.c ++++ b/drivers/block/ataflop.c +@@ -1460,8 +1460,7 @@ static int floppy_revalidate(struct gendisk *disk) + unsigned int drive = p - unit; + + if (test_bit(drive, &changed_floppies) || +- test_bit(drive, &fake_change) || +- p->disktype == 0) { ++ test_bit(drive, &fake_change) || !p->disktype) { + if (UD.flags & FTD_MSG) + printk(KERN_ERR "floppy: clear format %p!\n", UDT); + BufferDrive = -1; +-- +2.35.3 + diff --git a/patches.suse/block-ataflop-add-error-handling-support-for-add_dis.patch b/patches.suse/block-ataflop-add-error-handling-support-for-add_dis.patch new file mode 100644 index 0000000..004da5e --- /dev/null +++ b/patches.suse/block-ataflop-add-error-handling-support-for-add_dis.patch @@ -0,0 +1,46 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:03:02 -0700 +Subject: [PATCH] block/ataflop: add error handling support for add_disk() +Git-commit: 2f1510708970c873c5eee190b77071f59f67cef8 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +Signed-off-by: Luis Chamberlain +Link: https://lore.kernel.org/r/20210927220302.1073499-15-mcgrof@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/ataflop.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c +index a07cb9b79a6c..9bc5cce6b29a 100644 +--- a/drivers/block/ataflop.c ++++ b/drivers/block/ataflop.c +@@ -2082,7 +2082,9 @@ static int __init atari_floppy_init (void) + for (i = 0; i < FD_MAX_UNITS; i++) { + unit[i].track = -1; + unit[i].flags = 0; +- add_disk(unit[i].disk[0]); ++ ret = add_disk(unit[i].disk[0]); ++ if (ret) ++ goto err_out_dma; + unit[i].registered[0] = true; + } + +@@ -2093,6 +2095,8 @@ static int __init atari_floppy_init (void) + + return 0; + ++err_out_dma: ++ atari_stram_free(DMABuffer); + err: + while (--i >= 0) + atari_cleanup_floppy_disk(&unit[i]); +-- +2.35.3 + diff --git a/patches.suse/block-attempt-direct-issue-of-plug-list.patch b/patches.suse/block-attempt-direct-issue-of-plug-list.patch new file mode 100644 index 0000000..06b0452 --- /dev/null +++ b/patches.suse/block-attempt-direct-issue-of-plug-list.patch @@ -0,0 +1,133 @@ +From: Jens Axboe +Date: Tue, 19 Oct 2021 06:02:30 -0600 +Subject: [PATCH] block: attempt direct issue of plug list +Git-commit: dc5fc361d891e089dfd9c0a975dc78041036b906 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +If we have just one queue type in the plug list, then we can extend our +direct issue to cover a full plug list as well. This allows sending a +batch of requests for direct issue, which is more efficient than doing +one-at-a-time kind of issue. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-core.c | 1 + + block/blk-mq.c | 60 ++++++++++++++++++++++++++++++++++++++++++ + include/linux/blkdev.h | 1 + + 3 files changed, 62 insertions(+) + +diff --git a/block/blk-core.c b/block/blk-core.c +index 14d20909f61a..e6ad5b51d0c3 100644 +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -1555,6 +1555,7 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios) + plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT); + plug->rq_count = 0; + plug->multiple_queues = false; ++ plug->has_elevator = false; + plug->nowait = false; + INIT_LIST_HEAD(&plug->cb_list); + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 7fa302730d4a..71ab7521dd3d 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -2149,6 +2149,58 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, + spin_unlock(&ctx->lock); + } + ++static void blk_mq_commit_rqs(struct blk_mq_hw_ctx *hctx, int *queued, ++ bool from_schedule) ++{ ++ if (hctx->queue->mq_ops->commit_rqs) { ++ trace_block_unplug(hctx->queue, *queued, !from_schedule); ++ hctx->queue->mq_ops->commit_rqs(hctx); ++ } ++ *queued = 0; ++} ++ ++static void blk_mq_plug_issue_direct(struct blk_plug *plug, bool from_schedule) ++{ ++ struct blk_mq_hw_ctx *hctx = NULL; ++ struct request *rq; ++ int queued = 0; ++ int errors = 0; ++ ++ while ((rq = rq_list_pop(&plug->mq_list))) { ++ bool last = rq_list_empty(plug->mq_list); ++ blk_status_t ret; ++ ++ if (hctx != rq->mq_hctx) { ++ if (hctx) ++ blk_mq_commit_rqs(hctx, &queued, from_schedule); ++ hctx = rq->mq_hctx; ++ } ++ ++ ret = blk_mq_request_issue_directly(rq, last); ++ switch (ret) { ++ case BLK_STS_OK: ++ queued++; ++ break; ++ case BLK_STS_RESOURCE: ++ case BLK_STS_DEV_RESOURCE: ++ blk_mq_request_bypass_insert(rq, false, last); ++ blk_mq_commit_rqs(hctx, &queued, from_schedule); ++ return; ++ default: ++ blk_mq_end_request(rq, ret); ++ errors++; ++ break; ++ } ++ } ++ ++ /* ++ * If we didn't flush the entire list, we could have told the driver ++ * there was more coming, but that turned out to be a lie. ++ */ ++ if (errors) ++ blk_mq_commit_rqs(hctx, &queued, from_schedule); ++} ++ + void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) + { + struct blk_mq_hw_ctx *this_hctx; +@@ -2160,6 +2212,12 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) + return; + plug->rq_count = 0; + ++ if (!plug->multiple_queues && !plug->has_elevator) { ++ blk_mq_plug_issue_direct(plug, from_schedule); ++ if (rq_list_empty(plug->mq_list)) ++ return; ++ } ++ + this_hctx = NULL; + this_ctx = NULL; + depth = 0; +@@ -2376,6 +2434,8 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) + if (nxt && nxt->q != rq->q) + plug->multiple_queues = true; + } ++ if (!plug->has_elevator && (rq->rq_flags & RQF_ELV)) ++ plug->has_elevator = true; + rq->rq_next = NULL; + rq_list_add(&plug->mq_list, rq); + plug->rq_count++; +diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h +index 4027112b9851..f13091d3d476 100644 +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -737,6 +737,7 @@ struct blk_plug { + unsigned short rq_count; + + bool multiple_queues; ++ bool has_elevator; + bool nowait; + + struct list_head cb_list; /* md requires an unplug callback */ +-- +2.35.3 + diff --git a/patches.suse/block-avoid-extra-iter-advance-with-async-iocb.patch b/patches.suse/block-avoid-extra-iter-advance-with-async-iocb.patch new file mode 100644 index 0000000..bf8af23 --- /dev/null +++ b/patches.suse/block-avoid-extra-iter-advance-with-async-iocb.patch @@ -0,0 +1,81 @@ +From: Pavel Begunkov +Date: Wed, 27 Oct 2021 13:21:07 +0100 +Subject: [PATCH] block: avoid extra iter advance with async iocb +Git-commit: 1bb6b81029456f4e2e6727c5167f43bdfc34bee5 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Nobody cares about iov iterators state if we return -EIOCBQUEUED, so as +the we now have __blkdev_direct_IO_async(), which gets pages only once, +we can skip expensive iov_iter_advance(). It's around 1-2% of all CPU +spent. + +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/a6158edfbfa2ae3bc24aed29a72f035df18fad2f.1635337135.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/bio.c | 2 +- + block/fops.c | 20 +++++++++++++++----- + include/linux/bio.h | 1 + + 3 files changed, 17 insertions(+), 6 deletions(-) + +diff --git a/block/bio.c b/block/bio.c +index ead1f8a9ff5e..15ab0d6d1c06 100644 +--- a/block/bio.c ++++ b/block/bio.c +@@ -1046,7 +1046,7 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty) + } + EXPORT_SYMBOL_GPL(__bio_release_pages); + +-static void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter) ++void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter) + { + size_t size = iov_iter_count(iter); + +diff --git a/block/fops.c b/block/fops.c +index a7b328296912..092e5079e827 100644 +--- a/block/fops.c ++++ b/block/fops.c +@@ -352,11 +352,21 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, + bio->bi_end_io = blkdev_bio_end_io_async; + bio->bi_ioprio = iocb->ki_ioprio; + +- ret = bio_iov_iter_get_pages(bio, iter); +- if (unlikely(ret)) { +- bio->bi_status = BLK_STS_IOERR; +- bio_endio(bio); +- return ret; ++ if (iov_iter_is_bvec(iter)) { ++ /* ++ * Users don't rely on the iterator being in any particular ++ * state for async I/O returning -EIOCBQUEUED, hence we can ++ * avoid expensive iov_iter_advance(). Bypass ++ * bio_iov_iter_get_pages() and set the bvec directly. ++ */ ++ bio_iov_bvec_set(bio, iter); ++ } else { ++ ret = bio_iov_iter_get_pages(bio, iter); ++ if (unlikely(ret)) { ++ bio->bi_status = BLK_STS_IOERR; ++ bio_endio(bio); ++ return ret; ++ } + } + dio->size = bio->bi_iter.bi_size; + +diff --git a/include/linux/bio.h b/include/linux/bio.h +index c88700d1bdc3..fe6bdfbbef66 100644 +--- a/include/linux/bio.h ++++ b/include/linux/bio.h +@@ -417,6 +417,7 @@ int bio_add_zone_append_page(struct bio *bio, struct page *page, + void __bio_add_page(struct bio *bio, struct page *page, + unsigned int len, unsigned int off); + int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); ++void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter); + void __bio_release_pages(struct bio *bio, bool mark_dirty); + extern void bio_set_pages_dirty(struct bio *bio); + extern void bio_check_pages_dirty(struct bio *bio); +-- +2.35.3 + diff --git a/patches.suse/block-blk_mq_rq_ctx_init-cache-ctx-q-hctx.patch b/patches.suse/block-blk_mq_rq_ctx_init-cache-ctx-q-hctx.patch new file mode 100644 index 0000000..76c08c5 --- /dev/null +++ b/patches.suse/block-blk_mq_rq_ctx_init-cache-ctx-q-hctx.patch @@ -0,0 +1,61 @@ +From: Pavel Begunkov +Date: Mon, 18 Oct 2021 21:37:28 +0100 +Subject: [PATCH] block: blk_mq_rq_ctx_init cache ctx/q/hctx +Git-commit: 605f784e4f5faecf6c78070c6bf446e920104f9f +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We should have enough of registers in blk_mq_rq_ctx_init(), store them +in local vars, so we don't keep reloading them. + +note: keeping q->elevator may look unnecessary, but it's also used +inside inlined blk_mq_tags_from_data(). + +Signed-off-by: Pavel Begunkov +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 1d2e2fd4043e..fa4de25c3bcb 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -312,10 +312,14 @@ static inline bool blk_mq_need_time_stamp(struct request *rq) + static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, + unsigned int tag, u64 alloc_time_ns) + { ++ struct blk_mq_ctx *ctx = data->ctx; ++ struct blk_mq_hw_ctx *hctx = data->hctx; ++ struct request_queue *q = data->q; ++ struct elevator_queue *e = q->elevator; + struct blk_mq_tags *tags = blk_mq_tags_from_data(data); + struct request *rq = tags->static_rqs[tag]; + +- if (data->q->elevator) { ++ if (e) { + rq->rq_flags = RQF_ELV; + rq->tag = BLK_MQ_NO_TAG; + rq->internal_tag = tag; +@@ -330,13 +334,13 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, + else + rq->start_time_ns = 0; + /* csd/requeue_work/fifo_time is initialized before use */ +- rq->q = data->q; +- rq->mq_ctx = data->ctx; +- rq->mq_hctx = data->hctx; ++ rq->q = q; ++ rq->mq_ctx = ctx; ++ rq->mq_hctx = hctx; + rq->cmd_flags = data->cmd_flags; + if (data->flags & BLK_MQ_REQ_PM) + rq->rq_flags |= RQF_PM; +- if (blk_queue_io_stat(data->q)) ++ if (blk_queue_io_stat(q)) + rq->rq_flags |= RQF_IO_STAT; + rq->rq_disk = NULL; + rq->part = NULL; +-- +2.35.3 + diff --git a/patches.suse/block-cache-bdev-in-struct-file-for-raw-bdev-IO.patch b/patches.suse/block-cache-bdev-in-struct-file-for-raw-bdev-IO.patch new file mode 100644 index 0000000..bfb2a88 --- /dev/null +++ b/patches.suse/block-cache-bdev-in-struct-file-for-raw-bdev-IO.patch @@ -0,0 +1,118 @@ +From: Pavel Begunkov +Date: Wed, 13 Oct 2021 09:57:11 +0100 +Subject: [PATCH] block: cache bdev in struct file for raw bdev IO +Git-commit: fac7c6d529acf2b5428ad08c1b1127e29e570790 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +bdev = &BDEV_I(file->f_mapping->host)->bdev + +Getting struct block_device from a file requires 2 memory dereferences +as illustrated above, that takes a toll on performance, so cache it in +yet unused file->private_data. That gives a noticeable peak performance +improvement. + +Signed-off-by: Pavel Begunkov +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/8415f9fe12e544b9da89593dfbca8de2b52efe03.1634115360.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/fops.c | 27 ++++++++++++--------------- + 1 file changed, 12 insertions(+), 15 deletions(-) + +diff --git a/block/fops.c b/block/fops.c +index ce1255529ba2..551b71af6d90 100644 +--- a/block/fops.c ++++ b/block/fops.c +@@ -17,7 +17,7 @@ + #include + #include "blk.h" + +-static struct inode *bdev_file_inode(struct file *file) ++static inline struct inode *bdev_file_inode(struct file *file) + { + return file->f_mapping->host; + } +@@ -54,8 +54,7 @@ static void blkdev_bio_end_io_simple(struct bio *bio) + static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, + struct iov_iter *iter, unsigned int nr_pages) + { +- struct file *file = iocb->ki_filp; +- struct block_device *bdev = I_BDEV(bdev_file_inode(file)); ++ struct block_device *bdev = iocb->ki_filp->private_data; + struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs; + loff_t pos = iocb->ki_pos; + bool should_dirty = false; +@@ -183,9 +182,7 @@ static void blkdev_bio_end_io(struct bio *bio) + static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + unsigned int nr_pages) + { +- struct file *file = iocb->ki_filp; +- struct inode *inode = bdev_file_inode(file); +- struct block_device *bdev = I_BDEV(inode); ++ struct block_device *bdev = iocb->ki_filp->private_data; + struct blk_plug plug; + struct blkdev_dio *dio; + struct bio *bio; +@@ -389,8 +386,7 @@ static loff_t blkdev_llseek(struct file *file, loff_t offset, int whence) + static int blkdev_fsync(struct file *filp, loff_t start, loff_t end, + int datasync) + { +- struct inode *bd_inode = bdev_file_inode(filp); +- struct block_device *bdev = I_BDEV(bd_inode); ++ struct block_device *bdev = filp->private_data; + int error; + + error = file_write_and_wait_range(filp, start, end); +@@ -432,6 +428,8 @@ static int blkdev_open(struct inode *inode, struct file *filp) + bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); ++ ++ filp->private_data = bdev; + filp->f_mapping = bdev->bd_inode->i_mapping; + filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); + return 0; +@@ -439,7 +437,7 @@ static int blkdev_open(struct inode *inode, struct file *filp) + + static int blkdev_close(struct inode *inode, struct file *filp) + { +- struct block_device *bdev = I_BDEV(bdev_file_inode(filp)); ++ struct block_device *bdev = filp->private_data; + + blkdev_put(bdev, filp->f_mode); + return 0; +@@ -454,14 +452,14 @@ static int blkdev_close(struct inode *inode, struct file *filp) + */ + static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) + { +- struct file *file = iocb->ki_filp; +- struct inode *bd_inode = bdev_file_inode(file); ++ struct block_device *bdev = iocb->ki_filp->private_data; ++ struct inode *bd_inode = bdev->bd_inode; + loff_t size = i_size_read(bd_inode); + struct blk_plug plug; + size_t shorted = 0; + ssize_t ret; + +- if (bdev_read_only(I_BDEV(bd_inode))) ++ if (bdev_read_only(bdev)) + return -EPERM; + + if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev)) +@@ -493,9 +491,8 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) + + static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) + { +- struct file *file = iocb->ki_filp; +- struct inode *bd_inode = bdev_file_inode(file); +- loff_t size = i_size_read(bd_inode); ++ struct block_device *bdev = iocb->ki_filp->private_data; ++ loff_t size = i_size_read(bdev->bd_inode); + loff_t pos = iocb->ki_pos; + size_t shorted = 0; + ssize_t ret; +-- +2.35.3 + diff --git a/patches.suse/block-cache-inode-size-in-bdev.patch b/patches.suse/block-cache-inode-size-in-bdev.patch new file mode 100644 index 0000000..3a96612 --- /dev/null +++ b/patches.suse/block-cache-inode-size-in-bdev.patch @@ -0,0 +1,85 @@ +From: Jens Axboe +Date: Mon, 18 Oct 2021 11:39:45 -0600 +Subject: [PATCH] block: cache inode size in bdev +Git-commit: f09313c57a17683cbcb305989daf1d94b49fd32c +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Reading the inode size brings in a new cacheline for IO submit, and +it's in the hot path being checked for every single IO. When doing +millions of IOs per core per second, this is noticeable overhead. + +Cache the nr_sectors in the bdev itself. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/genhd.c | 1 + + block/partitions/core.c | 1 + + include/linux/blk_types.h | 1 + + include/linux/genhd.h | 8 ++++---- + 4 files changed, 7 insertions(+), 4 deletions(-) + +diff --git a/block/genhd.c b/block/genhd.c +index 759bc06810f8..53495e3391e3 100644 +--- a/block/genhd.c ++++ b/block/genhd.c +@@ -58,6 +58,7 @@ void set_capacity(struct gendisk *disk, sector_t sectors) + + spin_lock(&bdev->bd_size_lock); + i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); ++ bdev->bd_nr_sectors = sectors; + spin_unlock(&bdev->bd_size_lock); + } + EXPORT_SYMBOL(set_capacity); +diff --git a/block/partitions/core.c b/block/partitions/core.c +index 9dbddc355b40..66ef9bc6d6a1 100644 +--- a/block/partitions/core.c ++++ b/block/partitions/core.c +@@ -91,6 +91,7 @@ static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors) + { + spin_lock(&bdev->bd_size_lock); + i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); ++ bdev->bd_nr_sectors = sectors; + spin_unlock(&bdev->bd_size_lock); + } + +diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h +index 472e55e0e94f..fe065c394fff 100644 +--- a/include/linux/blk_types.h ++++ b/include/linux/blk_types.h +@@ -39,6 +39,7 @@ struct bio_crypt_ctx; + + struct block_device { + sector_t bd_start_sect; ++ sector_t bd_nr_sectors; + struct disk_stats __percpu *bd_stats; + unsigned long bd_stamp; + bool bd_read_only; /* read-only policy */ +diff --git a/include/linux/genhd.h b/include/linux/genhd.h +index 7b0326661a1e..900325aa5d31 100644 +--- a/include/linux/genhd.h ++++ b/include/linux/genhd.h +@@ -236,14 +236,14 @@ static inline sector_t get_start_sect(struct block_device *bdev) + return bdev->bd_start_sect; + } + +-static inline loff_t bdev_nr_bytes(struct block_device *bdev) ++static inline sector_t bdev_nr_sectors(struct block_device *bdev) + { +- return i_size_read(bdev->bd_inode); ++ return bdev->bd_nr_sectors; + } + +-static inline sector_t bdev_nr_sectors(struct block_device *bdev) ++static inline loff_t bdev_nr_bytes(struct block_device *bdev) + { +- return bdev_nr_bytes(bdev) >> SECTOR_SHIFT; ++ return bdev_nr_sectors(bdev) << SECTOR_SHIFT; + } + + static inline sector_t get_capacity(struct gendisk *disk) +-- +2.35.3 + diff --git a/patches.suse/block-cache-request-queue-in-bdev.patch b/patches.suse/block-cache-request-queue-in-bdev.patch new file mode 100644 index 0000000..a759440 --- /dev/null +++ b/patches.suse/block-cache-request-queue-in-bdev.patch @@ -0,0 +1,88 @@ +From: Pavel Begunkov +Date: Thu, 14 Oct 2021 15:03:26 +0100 +Subject: [PATCH] block: cache request queue in bdev +Git-commit: 17220ca5ce9606c1b015c4316fca18734c2df0bb +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +There are tons of places where we need to get a request_queue only +having bdev, which turns into bdev->bd_disk->queue. There are probably a +hundred of such places considering inline helpers, and enough of them +are in hot paths. + +Cache queue pointer in struct block_device and make use of it in +bdev_get_queue(). + +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/a3bfaecdd28956f03629d0ca5c63ebc096e1c809.1634219547.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/bdev.c | 1 + + block/genhd.c | 4 +++- + include/linux/blk_types.h | 1 + + include/linux/blkdev.h | 2 +- + 4 files changed, 6 insertions(+), 2 deletions(-) + +diff --git a/block/bdev.c b/block/bdev.c +index 93b1188d7e58..fed8d0c041c7 100644 +--- a/block/bdev.c ++++ b/block/bdev.c +@@ -493,6 +493,7 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) + spin_lock_init(&bdev->bd_size_lock); + bdev->bd_partno = partno; + bdev->bd_inode = inode; ++ bdev->bd_queue = disk->queue; + bdev->bd_stats = alloc_percpu(struct disk_stats); + if (!bdev->bd_stats) { + iput(inode); +diff --git a/block/genhd.c b/block/genhd.c +index ffbdb9b24555..d148c38450d7 100644 +--- a/block/genhd.c ++++ b/block/genhd.c +@@ -1267,6 +1267,9 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, + if (!disk->bdi) + goto out_free_disk; + ++ /* bdev_alloc() might need the queue, set before the first call */ ++ disk->queue = q; ++ + disk->part0 = bdev_alloc(disk, 0); + if (!disk->part0) + goto out_free_bdi; +@@ -1282,7 +1285,6 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, + disk_to_dev(disk)->type = &disk_type; + device_initialize(disk_to_dev(disk)); + inc_diskseq(disk); +- disk->queue = q; + q->disk = disk; + lockdep_init_map(&disk->lockdep_map, "(bio completion)", lkclass, 0); + #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED +diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h +index 72736b4c057c..1e370929c89e 100644 +--- a/include/linux/blk_types.h ++++ b/include/linux/blk_types.h +@@ -38,6 +38,7 @@ struct block_device { + u8 bd_partno; + spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ + struct gendisk * bd_disk; ++ struct request_queue * bd_queue; + + /* The counter of freeze processes */ + int bd_fsfreeze_count; +diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h +index 2a8689e949b4..d5b21fc8f49e 100644 +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -574,7 +574,7 @@ int iocb_bio_iopoll(struct kiocb *kiocb, unsigned int flags); + + static inline struct request_queue *bdev_get_queue(struct block_device *bdev) + { +- return bdev->bd_disk->queue; /* this is never NULL */ ++ return bdev->bd_queue; /* this is never NULL */ + } + + /* +-- +2.35.3 + diff --git a/patches.suse/block-cache-rq_flags-inside-blk_mq_rq_ctx_init.patch b/patches.suse/block-cache-rq_flags-inside-blk_mq_rq_ctx_init.patch new file mode 100644 index 0000000..e8c9a7c --- /dev/null +++ b/patches.suse/block-cache-rq_flags-inside-blk_mq_rq_ctx_init.patch @@ -0,0 +1,61 @@ +From: Pavel Begunkov +Date: Mon, 18 Oct 2021 21:37:29 +0100 +Subject: [PATCH] block: cache rq_flags inside blk_mq_rq_ctx_init() +Git-commit: 128459062bc994355027e190477c432ec5b5638a +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Add a local variable for rq_flags, it helps to compile out some of +rq_flags reloads. + +Signed-off-by: Pavel Begunkov +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index fa4de25c3bcb..633d73580712 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -318,17 +318,23 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, + struct elevator_queue *e = q->elevator; + struct blk_mq_tags *tags = blk_mq_tags_from_data(data); + struct request *rq = tags->static_rqs[tag]; ++ unsigned int rq_flags = 0; + + if (e) { +- rq->rq_flags = RQF_ELV; ++ rq_flags = RQF_ELV; + rq->tag = BLK_MQ_NO_TAG; + rq->internal_tag = tag; + } else { +- rq->rq_flags = 0; + rq->tag = tag; + rq->internal_tag = BLK_MQ_NO_TAG; + } + ++ if (data->flags & BLK_MQ_REQ_PM) ++ rq_flags |= RQF_PM; ++ if (blk_queue_io_stat(q)) ++ rq_flags |= RQF_IO_STAT; ++ rq->rq_flags = rq_flags; ++ + if (blk_mq_need_time_stamp(rq)) + rq->start_time_ns = ktime_get_ns(); + else +@@ -338,10 +344,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, + rq->mq_ctx = ctx; + rq->mq_hctx = hctx; + rq->cmd_flags = data->cmd_flags; +- if (data->flags & BLK_MQ_REQ_PM) +- rq->rq_flags |= RQF_PM; +- if (blk_queue_io_stat(q)) +- rq->rq_flags |= RQF_IO_STAT; + rq->rq_disk = NULL; + rq->part = NULL; + #ifdef CONFIG_BLK_RQ_ALLOC_TIME +-- +2.35.3 + diff --git a/patches.suse/block-change-plugging-to-use-a-singly-linked-list.patch b/patches.suse/block-change-plugging-to-use-a-singly-linked-list.patch new file mode 100644 index 0000000..818075f --- /dev/null +++ b/patches.suse/block-change-plugging-to-use-a-singly-linked-list.patch @@ -0,0 +1,217 @@ +From: Jens Axboe +Date: Mon, 18 Oct 2021 10:12:12 -0600 +Subject: [PATCH] block: change plugging to use a singly linked list +Git-commit: bc490f81731e181b07b8d7577425c06ae91692c8 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use a singly linked list for the blk_plug. This saves 8 bytes in the +blk_plug struct, and makes for faster list manipulations than doubly +linked lists. As we don't use the doubly linked lists for anything, +singly linked is just fine. + +This yields a bump in default (merging enabled) performance from 7.0 +to 7.1M IOPS, and ~7.5M IOPS with merging disabled. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-core.c | 4 +-- + block/blk-merge.c | 4 +-- + block/blk-mq.c | 80 ++++++++++++++++++++++++------------------ + include/linux/blkdev.h | 5 ++- + 4 files changed, 51 insertions(+), 42 deletions(-) + +diff --git a/block/blk-core.c b/block/blk-core.c +index d0c2e11411d0..14d20909f61a 100644 +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -1550,7 +1550,7 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios) + if (tsk->plug) + return; + +- INIT_LIST_HEAD(&plug->mq_list); ++ plug->mq_list = NULL; + plug->cached_rq = NULL; + plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT); + plug->rq_count = 0; +@@ -1640,7 +1640,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) + { + flush_plug_callbacks(plug, from_schedule); + +- if (!list_empty(&plug->mq_list)) ++ if (!rq_list_empty(plug->mq_list)) + blk_mq_flush_plug_list(plug, from_schedule); + if (unlikely(!from_schedule && plug->cached_rq)) + blk_mq_free_plug_rqs(plug); +diff --git a/block/blk-merge.c b/block/blk-merge.c +index c273b58378ce..3e6fa449caff 100644 +--- a/block/blk-merge.c ++++ b/block/blk-merge.c +@@ -1090,11 +1090,11 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, + struct request *rq; + + plug = blk_mq_plug(q, bio); +- if (!plug || list_empty(&plug->mq_list)) ++ if (!plug || rq_list_empty(plug->mq_list)) + return false; + + /* check the previously added entry for a quick merge attempt */ +- rq = list_last_entry(&plug->mq_list, struct request, queuelist); ++ rq = rq_list_peek(&plug->mq_list); + if (rq->q == q) { + /* + * Only blk-mq multiple hardware queues case checks the rq in +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 8f5c1662335b..7fa302730d4a 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -2151,34 +2151,46 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, + + void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) + { ++ struct blk_mq_hw_ctx *this_hctx; ++ struct blk_mq_ctx *this_ctx; ++ unsigned int depth; + LIST_HEAD(list); + +- if (list_empty(&plug->mq_list)) ++ if (rq_list_empty(plug->mq_list)) + return; +- list_splice_init(&plug->mq_list, &list); + plug->rq_count = 0; + ++ this_hctx = NULL; ++ this_ctx = NULL; ++ depth = 0; + do { +- struct list_head rq_list; +- struct request *rq, *head_rq = list_entry_rq(list.next); +- struct list_head *pos = &head_rq->queuelist; /* skip first */ +- struct blk_mq_hw_ctx *this_hctx = head_rq->mq_hctx; +- struct blk_mq_ctx *this_ctx = head_rq->mq_ctx; +- unsigned int depth = 1; +- +- list_for_each_continue(pos, &list) { +- rq = list_entry_rq(pos); +- BUG_ON(!rq->q); +- if (rq->mq_hctx != this_hctx || rq->mq_ctx != this_ctx) +- break; +- depth++; ++ struct request *rq; ++ ++ rq = rq_list_pop(&plug->mq_list); ++ ++ if (!this_hctx) { ++ this_hctx = rq->mq_hctx; ++ this_ctx = rq->mq_ctx; ++ } else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) { ++ trace_block_unplug(this_hctx->queue, depth, ++ !from_schedule); ++ blk_mq_sched_insert_requests(this_hctx, this_ctx, ++ &list, from_schedule); ++ depth = 0; ++ this_hctx = rq->mq_hctx; ++ this_ctx = rq->mq_ctx; ++ + } + +- list_cut_before(&rq_list, &list, pos); +- trace_block_unplug(head_rq->q, depth, !from_schedule); +- blk_mq_sched_insert_requests(this_hctx, this_ctx, &rq_list, ++ list_add(&rq->queuelist, &list); ++ depth++; ++ } while (!rq_list_empty(plug->mq_list)); ++ ++ if (!list_empty(&list)) { ++ trace_block_unplug(this_hctx->queue, depth, !from_schedule); ++ blk_mq_sched_insert_requests(this_hctx, this_ctx, &list, + from_schedule); +- } while(!list_empty(&list)); ++ } + } + + static void blk_mq_bio_to_request(struct request *rq, struct bio *bio, +@@ -2358,16 +2370,15 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, + + static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) + { +- list_add_tail(&rq->queuelist, &plug->mq_list); +- plug->rq_count++; +- if (!plug->multiple_queues && !list_is_singular(&plug->mq_list)) { +- struct request *tmp; ++ if (!plug->multiple_queues) { ++ struct request *nxt = rq_list_peek(&plug->mq_list); + +- tmp = list_first_entry(&plug->mq_list, struct request, +- queuelist); +- if (tmp->q != rq->q) ++ if (nxt && nxt->q != rq->q) + plug->multiple_queues = true; + } ++ rq->rq_next = NULL; ++ rq_list_add(&plug->mq_list, rq); ++ plug->rq_count++; + } + + /* +@@ -2479,13 +2490,15 @@ void blk_mq_submit_bio(struct bio *bio) + unsigned int request_count = plug->rq_count; + struct request *last = NULL; + +- if (!request_count) ++ if (!request_count) { + trace_block_plug(q); +- else +- last = list_entry_rq(plug->mq_list.prev); ++ } else if (!blk_queue_nomerges(q)) { ++ last = rq_list_peek(&plug->mq_list); ++ if (blk_rq_bytes(last) < BLK_PLUG_FLUSH_SIZE) ++ last = NULL; ++ } + +- if (request_count >= blk_plug_max_rq_count(plug) || (last && +- blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) { ++ if (request_count >= blk_plug_max_rq_count(plug) || last) { + blk_flush_plug_list(plug, false); + trace_block_plug(q); + } +@@ -2505,10 +2518,7 @@ void blk_mq_submit_bio(struct bio *bio) + * the plug list is empty, and same_queue_rq is invalid. + */ + if (same_queue_rq) { +- next_rq = list_last_entry(&plug->mq_list, +- struct request, +- queuelist); +- list_del_init(&next_rq->queuelist); ++ next_rq = rq_list_pop(&plug->mq_list); + plug->rq_count--; + } + blk_add_rq_to_plug(plug, rq); +diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h +index fd9771a1da09..4027112b9851 100644 +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -728,7 +728,7 @@ extern void blk_set_queue_dying(struct request_queue *); + * schedule() where blk_schedule_flush_plug() is called. + */ + struct blk_plug { +- struct list_head mq_list; /* blk-mq requests */ ++ struct request *mq_list; /* blk-mq requests */ + + /* if ios_left is > 1, we can batch tag/rq allocations */ + struct request *cached_rq; +@@ -777,8 +777,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) + struct blk_plug *plug = tsk->plug; + + return plug && +- (!list_empty(&plug->mq_list) || +- !list_empty(&plug->cb_list)); ++ (plug->mq_list || !list_empty(&plug->cb_list)); + } + + int blkdev_issue_flush(struct block_device *bdev); +-- +2.35.3 + diff --git a/patches.suse/block-clean-up-blk_mq_submit_bio-merging.patch b/patches.suse/block-clean-up-blk_mq_submit_bio-merging.patch new file mode 100644 index 0000000..e9fe4d9 --- /dev/null +++ b/patches.suse/block-clean-up-blk_mq_submit_bio-merging.patch @@ -0,0 +1,107 @@ +From: Pavel Begunkov +Date: Wed, 20 Oct 2021 20:00:49 +0100 +Subject: [PATCH] block: clean up blk_mq_submit_bio() merging +Git-commit: 179ae84f7ef5225e03cd29cb9a75f6e90d2d7388 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Combine blk_mq_sched_bio_merge() and blk_attempt_plug_merge() under a +common if, so we don't check it twice. + +Signed-off-by: Pavel Begunkov +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/daedc90d4029a5d1d73344771632b1faca3aaf81.1634755800.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq-sched.c | 2 +- + block/blk-mq-sched.h | 12 +----------- + block/blk-mq.c | 15 +++++++-------- + 3 files changed, 9 insertions(+), 20 deletions(-) + +diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c +index e85b7556b096..5b259fdea794 100644 +--- a/block/blk-mq-sched.c ++++ b/block/blk-mq-sched.c +@@ -361,7 +361,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) + } + } + +-bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, ++bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, + unsigned int nr_segs) + { + struct elevator_queue *e = q->elevator; +diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h +index 98836106b25f..25d1034952b6 100644 +--- a/block/blk-mq-sched.h ++++ b/block/blk-mq-sched.h +@@ -12,7 +12,7 @@ void blk_mq_sched_assign_ioc(struct request *rq); + + bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, + unsigned int nr_segs, struct request **merged_request); +-bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, ++bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, + unsigned int nr_segs); + bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq, + struct list_head *free); +@@ -42,16 +42,6 @@ static inline bool bio_mergeable(struct bio *bio) + return !(bio->bi_opf & REQ_NOMERGE_FLAGS); + } + +-static inline bool +-blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, +- unsigned int nr_segs) +-{ +- if (blk_queue_nomerges(q) || !bio_mergeable(bio)) +- return false; +- +- return __blk_mq_sched_bio_merge(q, bio, nr_segs); +-} +- + static inline bool + blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq, + struct bio *bio) +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 101466ece4c4..d04ee72ba125 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -2481,7 +2481,6 @@ void blk_mq_submit_bio(struct bio *bio) + { + struct request_queue *q = bdev_get_queue(bio->bi_bdev); + const int is_sync = op_is_sync(bio->bi_opf); +- const int is_flush_fua = op_is_flush(bio->bi_opf); + struct request *rq; + struct blk_plug *plug; + bool same_queue_rq = false; +@@ -2495,12 +2494,12 @@ void blk_mq_submit_bio(struct bio *bio) + if (!bio_integrity_prep(bio)) + goto queue_exit; + +- if (!is_flush_fua && !blk_queue_nomerges(q) && +- blk_attempt_plug_merge(q, bio, nr_segs, &same_queue_rq)) +- goto queue_exit; +- +- if (blk_mq_sched_bio_merge(q, bio, nr_segs)) +- goto queue_exit; ++ if (!blk_queue_nomerges(q) && bio_mergeable(bio)) { ++ if (blk_attempt_plug_merge(q, bio, nr_segs, &same_queue_rq)) ++ goto queue_exit; ++ if (blk_mq_sched_bio_merge(q, bio, nr_segs)) ++ goto queue_exit; ++ } + + rq_qos_throttle(q, bio); + +@@ -2543,7 +2542,7 @@ void blk_mq_submit_bio(struct bio *bio) + return; + } + +- if (is_flush_fua && blk_insert_flush(rq)) ++ if (op_is_flush(bio->bi_opf) && blk_insert_flush(rq)) + return; + + if (plug && (q->nr_hw_queues == 1 || +-- +2.35.3 + diff --git a/patches.suse/block-cleanup-the-flush-plug-helpers.patch b/patches.suse/block-cleanup-the-flush-plug-helpers.patch new file mode 100644 index 0000000..ec66d4e --- /dev/null +++ b/patches.suse/block-cleanup-the-flush-plug-helpers.patch @@ -0,0 +1,168 @@ +From: Christoph Hellwig +Date: Wed, 20 Oct 2021 16:41:19 +0200 +Subject: [PATCH] block: cleanup the flush plug helpers +Git-commit: 008f75a20e7072d0840ec323c39b42206f3fa8a0 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Consolidate the various helpers into a single blk_flush_plug helper that +takes a plk_plug and the from_scheduler bool and switch all callsites to +call it directly. Checks that the plug is non-NULL must be performed by +the caller, something that most already do anyway. + +Signed-off-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20211020144119.142582-5-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-core.c | 13 ++++++------- + fs/fs-writeback.c | 5 +++-- + include/linux/blkdev.h | 29 ++++------------------------- + kernel/sched/core.c | 5 +++-- + 4 files changed, 16 insertions(+), 36 deletions(-) + +diff --git a/block/blk-core.c b/block/blk-core.c +index db8b2fe0ceaf..dfa199312c2f 100644 +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -1089,7 +1089,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) + return 0; + + if (current->plug) +- blk_flush_plug_list(current->plug, false); ++ blk_flush_plug(current->plug, false); + + if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT)) + return 0; +@@ -1637,7 +1637,7 @@ struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data, + } + EXPORT_SYMBOL(blk_check_plugged); + +-void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) ++void blk_flush_plug(struct blk_plug *plug, bool from_schedule) + { + if (!list_empty(&plug->cb_list)) + flush_plug_callbacks(plug, from_schedule); +@@ -1659,11 +1659,10 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) + */ + void blk_finish_plug(struct blk_plug *plug) + { +- if (plug != current->plug) +- return; +- blk_flush_plug_list(plug, false); +- +- current->plug = NULL; ++ if (plug == current->plug) { ++ blk_flush_plug(plug, false); ++ current->plug = NULL; ++ } + } + EXPORT_SYMBOL(blk_finish_plug); + +diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c +index 81ec192ce067..4124a89a1a5d 100644 +--- a/fs/fs-writeback.c ++++ b/fs/fs-writeback.c +@@ -1893,7 +1893,8 @@ static long writeback_sb_inodes(struct super_block *sb, + * unplug, so get our IOs out the door before we + * give up the CPU. + */ +- blk_flush_plug(current); ++ if (current->plug) ++ blk_flush_plug(current->plug, false); + cond_resched(); + } + +@@ -2291,7 +2292,7 @@ void wakeup_flusher_threads(enum wb_reason reason) + * If we are expecting writeback progress we must submit plugged IO. + */ + if (blk_needs_flush_plug(current)) +- blk_schedule_flush_plug(current); ++ blk_flush_plug(current->plug, true); + + rcu_read_lock(); + list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) +diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h +index 2b22fa36e568..c7b1e9355123 100644 +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -725,9 +725,8 @@ extern void blk_set_queue_dying(struct request_queue *); + * as the lock contention for request_queue lock is reduced. + * + * It is ok not to disable preemption when adding the request to the plug list +- * or when attempting a merge, because blk_schedule_flush_list() will only flush +- * the plug list when the task sleeps by itself. For details, please see +- * schedule() where blk_schedule_flush_plug() is called. ++ * or when attempting a merge. For details, please see schedule() where ++ * blk_flush_plug() is called. + */ + struct blk_plug { + struct request *mq_list; /* blk-mq requests */ +@@ -757,23 +756,8 @@ extern struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, + extern void blk_start_plug(struct blk_plug *); + extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short); + extern void blk_finish_plug(struct blk_plug *); +-extern void blk_flush_plug_list(struct blk_plug *, bool); + +-static inline void blk_flush_plug(struct task_struct *tsk) +-{ +- struct blk_plug *plug = tsk->plug; +- +- if (plug) +- blk_flush_plug_list(plug, false); +-} +- +-static inline void blk_schedule_flush_plug(struct task_struct *tsk) +-{ +- struct blk_plug *plug = tsk->plug; +- +- if (plug) +- blk_flush_plug_list(plug, true); +-} ++void blk_flush_plug(struct blk_plug *plug, bool from_schedule); + + static inline bool blk_needs_flush_plug(struct task_struct *tsk) + { +@@ -802,15 +786,10 @@ static inline void blk_finish_plug(struct blk_plug *plug) + { + } + +-static inline void blk_flush_plug(struct task_struct *task) +-{ +-} +- +-static inline void blk_schedule_flush_plug(struct task_struct *task) ++static inline void blk_flush_plug(struct blk_plug *plug, bool async) + { + } + +- + static inline bool blk_needs_flush_plug(struct task_struct *tsk) + { + return false; +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 92ef7b68198c..34f37502c27e 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -6343,7 +6343,7 @@ static inline void sched_submit_work(struct task_struct *tsk) + * make sure to submit it to avoid deadlocks. + */ + if (blk_needs_flush_plug(tsk)) +- blk_schedule_flush_plug(tsk); ++ blk_flush_plug(tsk->plug, true); + } + + static void sched_update_worker(struct task_struct *tsk) +@@ -8354,7 +8354,8 @@ int io_schedule_prepare(void) + int old_iowait = current->in_iowait; + + current->in_iowait = 1; +- blk_schedule_flush_plug(current); ++ if (current->plug) ++ blk_flush_plug(current->plug, true); + + return old_iowait; + } +-- +2.35.3 + diff --git a/patches.suse/block-convert-fops.c-magic-constants-to-SHIFT_SECTOR.patch b/patches.suse/block-convert-fops.c-magic-constants-to-SHIFT_SECTOR.patch new file mode 100644 index 0000000..8a6362e --- /dev/null +++ b/patches.suse/block-convert-fops.c-magic-constants-to-SHIFT_SECTOR.patch @@ -0,0 +1,69 @@ +From: Pavel Begunkov +Date: Wed, 20 Oct 2021 20:00:50 +0100 +Subject: [PATCH] block: convert fops.c magic constants to SHIFT_SECTOR +Git-commit: 6549a874fb65e7ad4885d066ec314191cc137b52 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Don't use shifting by a magic number 9 but replace with a more +descriptive SHIFT_SECTOR. + +Signed-off-by: Pavel Begunkov +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/068782b9f7e97569fb59a99529b23bb17ea4c5e2.1634755800.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/fops.c | 18 ++++++++++-------- + 1 file changed, 10 insertions(+), 8 deletions(-) + +diff --git a/block/fops.c b/block/fops.c +index 8f733c919ed1..396537598e3e 100644 +--- a/block/fops.c ++++ b/block/fops.c +@@ -76,7 +76,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, + + bio_init(&bio, vecs, nr_pages); + bio_set_dev(&bio, bdev); +- bio.bi_iter.bi_sector = pos >> 9; ++ bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT; + bio.bi_write_hint = iocb->ki_hint; + bio.bi_private = current; + bio.bi_end_io = blkdev_bio_end_io_simple; +@@ -225,7 +225,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + + for (;;) { + bio_set_dev(bio, bdev); +- bio->bi_iter.bi_sector = pos >> 9; ++ bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT; + bio->bi_write_hint = iocb->ki_hint; + bio->bi_private = dio; + bio->bi_end_io = blkdev_bio_end_io; +@@ -565,16 +565,18 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, + switch (mode) { + case FALLOC_FL_ZERO_RANGE: + case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE: +- error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, +- GFP_KERNEL, BLKDEV_ZERO_NOUNMAP); ++ error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT, ++ len >> SECTOR_SHIFT, GFP_KERNEL, ++ BLKDEV_ZERO_NOUNMAP); + break; + case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE: +- error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, +- GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK); ++ error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT, ++ len >> SECTOR_SHIFT, GFP_KERNEL, ++ BLKDEV_ZERO_NOFALLBACK); + break; + case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE: +- error = blkdev_issue_discard(bdev, start >> 9, len >> 9, +- GFP_KERNEL, 0); ++ error = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT, ++ len >> SECTOR_SHIFT, GFP_KERNEL, 0); + break; + default: + error = -EOPNOTSUPP; +-- +2.35.3 + diff --git a/patches.suse/block-convert-leftovers-to-bdev_get_queue.patch b/patches.suse/block-convert-leftovers-to-bdev_get_queue.patch new file mode 100644 index 0000000..9970e74 --- /dev/null +++ b/patches.suse/block-convert-leftovers-to-bdev_get_queue.patch @@ -0,0 +1,49 @@ +From: Pavel Begunkov +Date: Tue, 19 Oct 2021 22:24:11 +0100 +Subject: [PATCH] block: convert leftovers to bdev_get_queue +Git-commit: 859897c3fb9ad12fef429b026154e11350d0ceac +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Convert bdev->bd_disk->queue to bdev_get_queue(), which is faster. +Apparently, there are a few such spots in block that got lost during +rebases. + +Reviewed-by: Chaitanya Kulkarni +Signed-off-by: Pavel Begunkov +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-core.c | 2 +- + block/blk-merge.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/block/blk-core.c b/block/blk-core.c +index e6ad5b51d0c3..c1ba34777c6d 100644 +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -1080,7 +1080,7 @@ EXPORT_SYMBOL(submit_bio); + */ + int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) + { +- struct request_queue *q = bio->bi_bdev->bd_disk->queue; ++ struct request_queue *q = bdev_get_queue(bio->bi_bdev); + blk_qc_t cookie = READ_ONCE(bio->bi_cookie); + int ret; + +diff --git a/block/blk-merge.c b/block/blk-merge.c +index 3e6fa449caff..df69f4bb7717 100644 +--- a/block/blk-merge.c ++++ b/block/blk-merge.c +@@ -383,7 +383,7 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio, + */ + void blk_queue_split(struct bio **bio) + { +- struct request_queue *q = (*bio)->bi_bdev->bd_disk->queue; ++ struct request_queue *q = bdev_get_queue((*bio)->bi_bdev); + unsigned int nr_segs; + + if (blk_may_split(q, *bio)) +-- +2.35.3 + diff --git a/patches.suse/block-convert-the-rest-of-block-to-bdev_get_queue.patch b/patches.suse/block-convert-the-rest-of-block-to-bdev_get_queue.patch new file mode 100644 index 0000000..5a56c0a --- /dev/null +++ b/patches.suse/block-convert-the-rest-of-block-to-bdev_get_queue.patch @@ -0,0 +1,223 @@ +From: Pavel Begunkov +Date: Thu, 14 Oct 2021 15:03:30 +0100 +Subject: [PATCH] block: convert the rest of block to bdev_get_queue +Git-commit: ed6cddefdfd361af23a25bdeaaa5e345ac714c38 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Convert bdev->bd_disk->queue to bdev_get_queue(), it's uses a cached +queue pointer and so is faster. + +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/addf6ea988c04213697ba3684c853e4ed7642a39.1634219547.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/bio-integrity.c | 2 +- + block/blk-cgroup.c | 16 ++++++++-------- + block/blk-crypto.c | 2 +- + block/blk-iocost.c | 12 ++++++------ + block/blk-mq.c | 2 +- + block/blk-throttle.c | 2 +- + block/genhd.c | 4 ++-- + block/partitions/core.c | 4 ++-- + 8 files changed, 22 insertions(+), 22 deletions(-) + +diff --git a/block/bio-integrity.c b/block/bio-integrity.c +index 21234ff966d9..d25114715459 100644 +--- a/block/bio-integrity.c ++++ b/block/bio-integrity.c +@@ -134,7 +134,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, + iv = bip->bip_vec + bip->bip_vcnt; + + if (bip->bip_vcnt && +- bvec_gap_to_prev(bio->bi_bdev->bd_disk->queue, ++ bvec_gap_to_prev(bdev_get_queue(bio->bi_bdev), + &bip->bip_vec[bip->bip_vcnt - 1], offset)) + return 0; + +diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c +index 166104d92b5e..8908298d6ad3 100644 +--- a/block/blk-cgroup.c ++++ b/block/blk-cgroup.c +@@ -621,7 +621,7 @@ struct block_device *blkcg_conf_open_bdev(char **inputp) + */ + int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, + char *input, struct blkg_conf_ctx *ctx) +- __acquires(rcu) __acquires(&bdev->bd_disk->queue->queue_lock) ++ __acquires(rcu) __acquires(&bdev->bd_queue->queue_lock) + { + struct block_device *bdev; + struct request_queue *q; +@@ -632,7 +632,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, + if (IS_ERR(bdev)) + return PTR_ERR(bdev); + +- q = bdev->bd_disk->queue; ++ q = bdev_get_queue(bdev); + + rcu_read_lock(); + spin_lock_irq(&q->queue_lock); +@@ -737,9 +737,9 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep); + * with blkg_conf_prep(). + */ + void blkg_conf_finish(struct blkg_conf_ctx *ctx) +- __releases(&ctx->bdev->bd_disk->queue->queue_lock) __releases(rcu) ++ __releases(&ctx->bdev->bd_queue->queue_lock) __releases(rcu) + { +- spin_unlock_irq(&ctx->bdev->bd_disk->queue->queue_lock); ++ spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock); + rcu_read_unlock(); + blkdev_put_no_open(ctx->bdev); + } +@@ -842,7 +842,7 @@ static void blkcg_fill_root_iostats(void) + while ((dev = class_dev_iter_next(&iter))) { + struct block_device *bdev = dev_to_bdev(dev); + struct blkcg_gq *blkg = +- blk_queue_root_blkg(bdev->bd_disk->queue); ++ blk_queue_root_blkg(bdev_get_queue(bdev)); + struct blkg_iostat tmp; + int cpu; + +@@ -1801,7 +1801,7 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct bio *bio, + + rcu_read_lock(); + blkg = blkg_lookup_create(css_to_blkcg(css), +- bio->bi_bdev->bd_disk->queue); ++ bdev_get_queue(bio->bi_bdev)); + while (blkg) { + if (blkg_tryget(blkg)) { + ret_blkg = blkg; +@@ -1837,8 +1837,8 @@ void bio_associate_blkg_from_css(struct bio *bio, + if (css && css->parent) { + bio->bi_blkg = blkg_tryget_closest(bio, css); + } else { +- blkg_get(bio->bi_bdev->bd_disk->queue->root_blkg); +- bio->bi_blkg = bio->bi_bdev->bd_disk->queue->root_blkg; ++ blkg_get(bdev_get_queue(bio->bi_bdev)->root_blkg); ++ bio->bi_blkg = bdev_get_queue(bio->bi_bdev)->root_blkg; + } + } + EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css); +diff --git a/block/blk-crypto.c b/block/blk-crypto.c +index 103c2e2d50d6..8f53f4a1f9e2 100644 +--- a/block/blk-crypto.c ++++ b/block/blk-crypto.c +@@ -280,7 +280,7 @@ bool __blk_crypto_bio_prep(struct bio **bio_ptr) + * Success if device supports the encryption context, or if we succeeded + * in falling back to the crypto API. + */ +- if (blk_ksm_crypto_cfg_supported(bio->bi_bdev->bd_disk->queue->ksm, ++ if (blk_ksm_crypto_cfg_supported(bdev_get_queue(bio->bi_bdev)->ksm, + &bc_key->crypto_cfg)) + return true; + +diff --git a/block/blk-iocost.c b/block/blk-iocost.c +index b3880e4ba22a..a5b37cc65b17 100644 +--- a/block/blk-iocost.c ++++ b/block/blk-iocost.c +@@ -3165,12 +3165,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, + if (IS_ERR(bdev)) + return PTR_ERR(bdev); + +- ioc = q_to_ioc(bdev->bd_disk->queue); ++ ioc = q_to_ioc(bdev_get_queue(bdev)); + if (!ioc) { +- ret = blk_iocost_init(bdev->bd_disk->queue); ++ ret = blk_iocost_init(bdev_get_queue(bdev)); + if (ret) + goto err; +- ioc = q_to_ioc(bdev->bd_disk->queue); ++ ioc = q_to_ioc(bdev_get_queue(bdev)); + } + + spin_lock_irq(&ioc->lock); +@@ -3332,12 +3332,12 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, + if (IS_ERR(bdev)) + return PTR_ERR(bdev); + +- ioc = q_to_ioc(bdev->bd_disk->queue); ++ ioc = q_to_ioc(bdev_get_queue(bdev)); + if (!ioc) { +- ret = blk_iocost_init(bdev->bd_disk->queue); ++ ret = blk_iocost_init(bdev_get_queue(bdev)); + if (ret) + goto err; +- ioc = q_to_ioc(bdev->bd_disk->queue); ++ ioc = q_to_ioc(bdev_get_queue(bdev)); + } + + spin_lock_irq(&ioc->lock); +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 09219080855f..b58878221f17 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -2253,7 +2253,7 @@ static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug) + */ + void blk_mq_submit_bio(struct bio *bio) + { +- struct request_queue *q = bio->bi_bdev->bd_disk->queue; ++ struct request_queue *q = bdev_get_queue(bio->bi_bdev); + const int is_sync = op_is_sync(bio->bi_opf); + const int is_flush_fua = op_is_flush(bio->bi_opf); + struct request *rq; +diff --git a/block/blk-throttle.c b/block/blk-throttle.c +index 8cefd14deed5..39bb6e68a9a2 100644 +--- a/block/blk-throttle.c ++++ b/block/blk-throttle.c +@@ -2063,7 +2063,7 @@ void blk_throtl_charge_bio_split(struct bio *bio) + + bool __blk_throtl_bio(struct bio *bio) + { +- struct request_queue *q = bio->bi_bdev->bd_disk->queue; ++ struct request_queue *q = bdev_get_queue(bio->bi_bdev); + struct blkcg_gq *blkg = bio->bi_blkg; + struct throtl_qnode *qn = NULL; + struct throtl_grp *tg = blkg_to_tg(blkg); +diff --git a/block/genhd.c b/block/genhd.c +index d148c38450d7..759bc06810f8 100644 +--- a/block/genhd.c ++++ b/block/genhd.c +@@ -883,7 +883,7 @@ ssize_t part_stat_show(struct device *dev, + struct device_attribute *attr, char *buf) + { + struct block_device *bdev = dev_to_bdev(dev); +- struct request_queue *q = bdev->bd_disk->queue; ++ struct request_queue *q = bdev_get_queue(bdev); + struct disk_stats stat; + unsigned int inflight; + +@@ -927,7 +927,7 @@ ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, + char *buf) + { + struct block_device *bdev = dev_to_bdev(dev); +- struct request_queue *q = bdev->bd_disk->queue; ++ struct request_queue *q = bdev_get_queue(bdev); + unsigned int inflight[2]; + + if (queue_is_mq(q)) +diff --git a/block/partitions/core.c b/block/partitions/core.c +index 3a4898433c43..9dbddc355b40 100644 +--- a/block/partitions/core.c ++++ b/block/partitions/core.c +@@ -204,7 +204,7 @@ static ssize_t part_alignment_offset_show(struct device *dev, + struct block_device *bdev = dev_to_bdev(dev); + + return sprintf(buf, "%u\n", +- queue_limit_alignment_offset(&bdev->bd_disk->queue->limits, ++ queue_limit_alignment_offset(&bdev_get_queue(bdev)->limits, + bdev->bd_start_sect)); + } + +@@ -214,7 +214,7 @@ static ssize_t part_discard_alignment_show(struct device *dev, + struct block_device *bdev = dev_to_bdev(dev); + + return sprintf(buf, "%u\n", +- queue_limit_discard_alignment(&bdev->bd_disk->queue->limits, ++ queue_limit_discard_alignment(&bdev_get_queue(bdev)->limits, + bdev->bd_start_sect)); + } + +-- +2.35.3 + diff --git a/patches.suse/block-define-struct-bvec_iter-as-packed.patch b/patches.suse/block-define-struct-bvec_iter-as-packed.patch new file mode 100644 index 0000000..b74732c --- /dev/null +++ b/patches.suse/block-define-struct-bvec_iter-as-packed.patch @@ -0,0 +1,44 @@ +From: Ming Lei +Date: Tue, 12 Oct 2021 13:12:23 +0200 +Subject: [PATCH] block: define 'struct bvec_iter' as packed +Git-commit: 19416123ab3e1348b3532347af221d8f60838431 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +'struct bvec_iter' is embedded into 'struct bio', define it as packed +so that we can get one extra 4bytes for other uses without expanding +bio. + +'struct bvec_iter' is often allocated on stack, so making it packed +doesn't affect performance. Also I have run io_uring on both +nvme/null_blk, and not observe performance effect in this way. + +Suggested-by: Christoph Hellwig +Signed-off-by: Ming Lei +Reviewed-by: Sagi Grimberg +Reviewed-by: Hannes Reinecke +Signed-off-by: Christoph Hellwig +Tested-by: Mark Wunderlich +Link: https://lore.kernel.org/r/20211012111226.760968-14-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + include/linux/bvec.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/linux/bvec.h b/include/linux/bvec.h +index 0e9bdd42dafb..35c25dff651a 100644 +--- a/include/linux/bvec.h ++++ b/include/linux/bvec.h +@@ -44,7 +44,7 @@ struct bvec_iter { + + unsigned int bi_bvec_done; /* number of bytes completed in + current bvec */ +-}; ++} __packed; + + struct bvec_iter_all { + struct bio_vec bv; +-- +2.35.3 + diff --git a/patches.suse/block-don-t-allow-writing-to-the-poll-queue-attribut.patch b/patches.suse/block-don-t-allow-writing-to-the-poll-queue-attribut.patch new file mode 100644 index 0000000..17df3dc --- /dev/null +++ b/patches.suse/block-don-t-allow-writing-to-the-poll-queue-attribut.patch @@ -0,0 +1,59 @@ +From: Christoph Hellwig +Date: Tue, 12 Oct 2021 13:12:25 +0200 +Subject: [PATCH] block: don't allow writing to the poll queue attribute +Git-commit: a614dd2280356df0c79300c49d82b7e0c8b31f24 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +The poll attribute is a historic artefact from before when we had +explicit poll queues that require driver specific configuration. +Just print a warning when writing to the attribute. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Sagi Grimberg +Tested-by: Mark Wunderlich +Link: https://lore.kernel.org/r/20211012111226.760968-16-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-sysfs.c | 23 ++++------------------- + 1 file changed, 4 insertions(+), 19 deletions(-) + +diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c +index da883efcba33..36f14d658e81 100644 +--- a/block/blk-sysfs.c ++++ b/block/blk-sysfs.c +@@ -433,26 +433,11 @@ static ssize_t queue_poll_show(struct request_queue *q, char *page) + static ssize_t queue_poll_store(struct request_queue *q, const char *page, + size_t count) + { +- unsigned long poll_on; +- ssize_t ret; +- +- if (!q->tag_set || q->tag_set->nr_maps <= HCTX_TYPE_POLL || +- !q->tag_set->map[HCTX_TYPE_POLL].nr_queues) ++ if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) + return -EINVAL; +- +- ret = queue_var_store(&poll_on, page, count); +- if (ret < 0) +- return ret; +- +- if (poll_on) { +- blk_queue_flag_set(QUEUE_FLAG_POLL, q); +- } else { +- blk_mq_freeze_queue(q); +- blk_queue_flag_clear(QUEUE_FLAG_POLL, q); +- blk_mq_unfreeze_queue(q); +- } +- +- return ret; ++ pr_info_ratelimited("writes to the poll attribute are ignored.\n"); ++ pr_info_ratelimited("please use driver specific parameters instead.\n"); ++ return count; + } + + static ssize_t queue_io_timeout_show(struct request_queue *q, char *page) +-- +2.35.3 + diff --git a/patches.suse/block-don-t-bloat-enter_queue-with-percpu_ref.patch b/patches.suse/block-don-t-bloat-enter_queue-with-percpu_ref.patch new file mode 100644 index 0000000..7358fdc --- /dev/null +++ b/patches.suse/block-don-t-bloat-enter_queue-with-percpu_ref.patch @@ -0,0 +1,36 @@ +From: Pavel Begunkov +Date: Tue, 19 Oct 2021 22:24:13 +0100 +Subject: [PATCH] block: don't bloat enter_queue with percpu_ref +Git-commit: 1497a51a3287959a9eb74e0432203ba3e2dc7347 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +percpu_ref_put() are inlined for performance and bloat the binary, we +don't care about the fail case of blk_try_enter_queue(), so we can +replace it with a call to blk_queue_exit(). + +Reviewed-by: Chaitanya Kulkarni +Reviewed-by: Christoph Hellwig +Signed-off-by: Pavel Begunkov +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/blk-core.c b/block/blk-core.c +index c1ba34777c6d..88752e51d2b6 100644 +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -404,7 +404,7 @@ static bool blk_try_enter_queue(struct request_queue *q, bool pm) + return true; + + fail_put: +- percpu_ref_put(&q->q_usage_counter); ++ blk_queue_exit(q); + fail: + rcu_read_unlock(); + return false; +-- +2.35.3 + diff --git a/patches.suse/block-don-t-bother-iter-advancing-a-fully-done-bio.patch b/patches.suse/block-don-t-bother-iter-advancing-a-fully-done-bio.patch new file mode 100644 index 0000000..f471e5b --- /dev/null +++ b/patches.suse/block-don-t-bother-iter-advancing-a-fully-done-bio.patch @@ -0,0 +1,97 @@ +From: Jens Axboe +Date: Wed, 13 Oct 2021 09:01:43 -0600 +Subject: [PATCH] block: don't bother iter advancing a fully done bio +Git-commit: d4aa57a1cac3c99ffd641f7c8e0a7aff5656de0d +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +If we're completing nbytes and nbytes is the size of the bio, don't bother +with calling into the iterator increment helpers. Just clear the bio +size and we're done. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/bio.c | 15 ++------------- + include/linux/bio.h | 24 ++++++++++++++++++++++-- + 2 files changed, 24 insertions(+), 15 deletions(-) + +diff --git a/block/bio.c b/block/bio.c +index 5fb8092577bf..4f397ba47db5 100644 +--- a/block/bio.c ++++ b/block/bio.c +@@ -1278,18 +1278,7 @@ int submit_bio_wait(struct bio *bio) + } + EXPORT_SYMBOL(submit_bio_wait); + +-/** +- * bio_advance - increment/complete a bio by some number of bytes +- * @bio: bio to advance +- * @bytes: number of bytes to complete +- * +- * This updates bi_sector, bi_size and bi_idx; if the number of bytes to +- * complete doesn't align with a bvec boundary, then bv_len and bv_offset will +- * be updated on the last bvec as well. +- * +- * @bio will then represent the remaining, uncompleted portion of the io. +- */ +-void bio_advance(struct bio *bio, unsigned bytes) ++void __bio_advance(struct bio *bio, unsigned bytes) + { + if (bio_integrity(bio)) + bio_integrity_advance(bio, bytes); +@@ -1297,7 +1286,7 @@ void bio_advance(struct bio *bio, unsigned bytes) + bio_crypt_advance(bio, bytes); + bio_advance_iter(bio, &bio->bi_iter, bytes); + } +-EXPORT_SYMBOL(bio_advance); ++EXPORT_SYMBOL(__bio_advance); + + void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, + struct bio *src, struct bvec_iter *src_iter) +diff --git a/include/linux/bio.h b/include/linux/bio.h +index 62d684b7dd4c..9538f20ffaa5 100644 +--- a/include/linux/bio.h ++++ b/include/linux/bio.h +@@ -119,6 +119,28 @@ static inline void bio_advance_iter_single(const struct bio *bio, + bvec_iter_advance_single(bio->bi_io_vec, iter, bytes); + } + ++void __bio_advance(struct bio *, unsigned bytes); ++ ++/** ++ * bio_advance - increment/complete a bio by some number of bytes ++ * @bio: bio to advance ++ * @bytes: number of bytes to complete ++ * ++ * This updates bi_sector, bi_size and bi_idx; if the number of bytes to ++ * complete doesn't align with a bvec boundary, then bv_len and bv_offset will ++ * be updated on the last bvec as well. ++ * ++ * @bio will then represent the remaining, uncompleted portion of the io. ++ */ ++static inline void bio_advance(struct bio *bio, unsigned int nbytes) ++{ ++ if (nbytes == bio->bi_iter.bi_size) { ++ bio->bi_iter.bi_size = 0; ++ return; ++ } ++ __bio_advance(bio, nbytes); ++} ++ + #define __bio_for_each_segment(bvl, bio, iter, start) \ + for (iter = (start); \ + (iter).bi_size && \ +@@ -381,8 +403,6 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs) + struct request_queue; + + extern int submit_bio_wait(struct bio *bio); +-extern void bio_advance(struct bio *, unsigned); +- + extern void bio_init(struct bio *bio, struct bio_vec *table, + unsigned short max_vecs); + extern void bio_uninit(struct bio *); +-- +2.35.3 + diff --git a/patches.suse/block-don-t-call-blk_status_to_errno-in-blk_update_r.patch b/patches.suse/block-don-t-call-blk_status_to_errno-in-blk_update_r.patch new file mode 100644 index 0000000..71e9ffe --- /dev/null +++ b/patches.suse/block-don-t-call-blk_status_to_errno-in-blk_update_r.patch @@ -0,0 +1,99 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 10:45:18 +0200 +Subject: [PATCH] block: don't call blk_status_to_errno in blk_update_request +Git-commit: 8a7d267b4a2c71a5ff5dd9046abea7117c7d0ac2 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We only need to call it to resolve the blk_status_t -> errno mapping for +tracing, so move the conversion into the tracepoints that are not called +at all when tracing isn't enabled. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 2 +- + include/trace/events/block.h | 6 +++--- + kernel/trace/blktrace.c | 7 ++++--- + 3 files changed, 8 insertions(+), 7 deletions(-) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index bbc61394eef3..59809ec24303 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -680,7 +680,7 @@ bool blk_update_request(struct request *req, blk_status_t error, + { + int total_bytes; + +- trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes); ++ trace_block_rq_complete(req, error, nr_bytes); + + if (!req->bio) + return false; +diff --git a/include/trace/events/block.h b/include/trace/events/block.h +index cc5ab96a7471..a95daa4d4caa 100644 +--- a/include/trace/events/block.h ++++ b/include/trace/events/block.h +@@ -114,7 +114,7 @@ TRACE_EVENT(block_rq_requeue, + */ + TRACE_EVENT(block_rq_complete, + +- TP_PROTO(struct request *rq, int error, unsigned int nr_bytes), ++ TP_PROTO(struct request *rq, blk_status_t error, unsigned int nr_bytes), + + TP_ARGS(rq, error, nr_bytes), + +@@ -122,7 +122,7 @@ TRACE_EVENT(block_rq_complete, + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) +- __field( int, error ) ++ __field( int , error ) + __array( char, rwbs, RWBS_LEN ) + __dynamic_array( char, cmd, 1 ) + ), +@@ -131,7 +131,7 @@ TRACE_EVENT(block_rq_complete, + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_rq_pos(rq); + __entry->nr_sector = nr_bytes >> 9; +- __entry->error = error; ++ __entry->error = blk_status_to_errno(error); + + blk_fill_rwbs(__entry->rwbs, rq->cmd_flags); + __get_str(cmd)[0] = '\0'; +diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c +index fa91f398f28b..1183c88634aa 100644 +--- a/kernel/trace/blktrace.c ++++ b/kernel/trace/blktrace.c +@@ -816,7 +816,7 @@ blk_trace_request_get_cgid(struct request *rq) + * Records an action against a request. Will log the bio offset + size. + * + **/ +-static void blk_add_trace_rq(struct request *rq, int error, ++static void blk_add_trace_rq(struct request *rq, blk_status_t error, + unsigned int nr_bytes, u32 what, u64 cgid) + { + struct blk_trace *bt; +@@ -834,7 +834,8 @@ static void blk_add_trace_rq(struct request *rq, int error, + what |= BLK_TC_ACT(BLK_TC_FS); + + __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq), +- rq->cmd_flags, what, error, 0, NULL, cgid); ++ rq->cmd_flags, what, blk_status_to_errno(error), 0, ++ NULL, cgid); + rcu_read_unlock(); + } + +@@ -863,7 +864,7 @@ static void blk_add_trace_rq_requeue(void *ignore, struct request *rq) + } + + static void blk_add_trace_rq_complete(void *ignore, struct request *rq, +- int error, unsigned int nr_bytes) ++ blk_status_t error, unsigned int nr_bytes) + { + blk_add_trace_rq(rq, error, nr_bytes, BLK_TA_COMPLETE, + blk_trace_request_get_cgid(rq)); +-- +2.35.3 + diff --git a/patches.suse/block-don-t-delete-queue-kobject-before-its-children.patch b/patches.suse/block-don-t-delete-queue-kobject-before-its-children.patch index a1fc986..42d488d 100644 --- a/patches.suse/block-don-t-delete-queue-kobject-before-its-children.patch +++ b/patches.suse/block-don-t-delete-queue-kobject-before-its-children.patch @@ -43,7 +43,7 @@ Acked-by: Jan Kara --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c -@@ -977,15 +977,17 @@ void blk_unregister_queue(struct gendisk +@@ -977,16 +977,18 @@ void blk_unregister_queue(struct gendisk */ if (queue_is_mq(q)) blk_mq_unregister_dev(disk_to_dev(disk), q); @@ -55,6 +55,7 @@ Acked-by: Jan Kara mutex_lock(&q->sysfs_lock); if (q->elevator) elv_unregister_queue(q); + disk_unregister_independent_access_ranges(disk); mutex_unlock(&q->sysfs_lock); + + /* Now that we've deleted all child objects, we can delete the queue. */ diff --git a/patches.suse/block-don-t-try-to-poll-multi-bio-I-Os-in-__blkdev_d.patch b/patches.suse/block-don-t-try-to-poll-multi-bio-I-Os-in-__blkdev_d.patch new file mode 100644 index 0000000..de4bc92 --- /dev/null +++ b/patches.suse/block-don-t-try-to-poll-multi-bio-I-Os-in-__blkdev_d.patch @@ -0,0 +1,93 @@ +From: Christoph Hellwig +Date: Tue, 12 Oct 2021 13:12:12 +0200 +Subject: [PATCH] block: don't try to poll multi-bio I/Os in __blkdev_direct_IO +Git-commit: 71fc3f5e2c00c966e6a2ffebadfbcc6914249d32 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +If an iocb is split into multiple bios we can't poll for both. So don't +even bother to try to poll in that case. + +Signed-off-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20211012111226.760968-3-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/fops.c | 21 +++++++-------------- + 1 file changed, 7 insertions(+), 14 deletions(-) + +diff --git a/block/fops.c b/block/fops.c +index 7bb9581a146c..15324f2e5a91 100644 +--- a/block/fops.c ++++ b/block/fops.c +@@ -197,7 +197,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + struct blk_plug plug; + struct blkdev_dio *dio; + struct bio *bio; +- bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0; ++ bool do_poll = (iocb->ki_flags & IOCB_HIPRI); + bool is_read = (iov_iter_rw(iter) == READ), is_sync; + loff_t pos = iocb->ki_pos; + blk_qc_t qc = BLK_QC_T_NONE; +@@ -226,7 +226,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + * Don't plug for HIPRI/polled IO, as those should go straight + * to issue + */ +- if (!is_poll) ++ if (!(iocb->ki_flags & IOCB_HIPRI)) + blk_start_plug(&plug); + + for (;;) { +@@ -260,20 +260,13 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + + nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS); + if (!nr_pages) { +- bool polled = false; +- +- if (iocb->ki_flags & IOCB_HIPRI) { ++ if (do_poll) + bio_set_polled(bio, iocb); +- polled = true; +- } +- + qc = submit_bio(bio); +- +- if (polled) ++ if (do_poll) + WRITE_ONCE(iocb->ki_cookie, qc); + break; + } +- + if (!dio->multi_bio) { + /* + * AIO needs an extra reference to ensure the dio +@@ -284,6 +277,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + bio_get(bio); + dio->multi_bio = true; + atomic_set(&dio->ref, 2); ++ do_poll = false; + } else { + atomic_inc(&dio->ref); + } +@@ -292,7 +286,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + bio = bio_alloc(GFP_KERNEL, nr_pages); + } + +- if (!is_poll) ++ if (!(iocb->ki_flags & IOCB_HIPRI)) + blk_finish_plug(&plug); + + if (!is_sync) +@@ -303,8 +297,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + if (!READ_ONCE(dio->waiter)) + break; + +- if (!(iocb->ki_flags & IOCB_HIPRI) || +- !blk_poll(bdev_get_queue(bdev), qc, true)) ++ if (!do_poll || !blk_poll(bdev_get_queue(bdev), qc, true)) + blk_io_schedule(); + } + __set_current_state(TASK_RUNNING); +-- +2.35.3 + diff --git a/patches.suse/block-drain-queue-after-disk-is-removed-from-sysfs.patch b/patches.suse/block-drain-queue-after-disk-is-removed-from-sysfs.patch new file mode 100644 index 0000000..10f9618 --- /dev/null +++ b/patches.suse/block-drain-queue-after-disk-is-removed-from-sysfs.patch @@ -0,0 +1,70 @@ +From: Ming Lei +Date: Tue, 26 Oct 2021 18:12:04 +0800 +Subject: [PATCH] block: drain queue after disk is removed from sysfs +Git-commit: d308ae0d299a6bb15be4efb91849582d19c23213 +Patch-mainline: v5.15 +References: jsc#PED-1183 + +Before removing disk from sysfs, userspace still may change queue via +sysfs, such as switching elevator or setting wbt latency, both may +reinitialize wbt, then the warning in blk_free_queue_stats() will be +triggered since rq_qos_exit() is moved to del_gendisk(). + +Fixes the issue by moving draining queue & tearing down after disk is +removed from sysfs, at that time no one can come into queue's +store()/show(). + +Reported-by: Yi Zhang +Tested-by: Yi Zhang +Fixes: 8e141f9eb803 ("block: drain file system I/O on del_gendisk") +Signed-off-by: Ming Lei +Link: https://lore.kernel.org/r/20211026101204.2897166-1-ming.lei@redhat.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/genhd.c | 22 ++++++++++++---------- + 1 file changed, 12 insertions(+), 10 deletions(-) + +diff --git a/block/genhd.c b/block/genhd.c +index b49858550fa6..ab12ae6e636e 100644 +--- a/block/genhd.c ++++ b/block/genhd.c +@@ -588,16 +588,6 @@ void del_gendisk(struct gendisk *disk) + * Prevent new I/O from crossing bio_queue_enter(). + */ + blk_queue_start_drain(q); +- blk_mq_freeze_queue_wait(q); +- +- rq_qos_exit(q); +- blk_sync_queue(q); +- blk_flush_integrity(); +- /* +- * Allow using passthrough request again after the queue is torn down. +- */ +- blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q); +- __blk_mq_unfreeze_queue(q, true); + + if (!(disk->flags & GENHD_FL_HIDDEN)) { + sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); +@@ -620,6 +610,18 @@ void del_gendisk(struct gendisk *disk) + sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); + pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); + device_del(disk_to_dev(disk)); ++ ++ blk_mq_freeze_queue_wait(q); ++ ++ rq_qos_exit(q); ++ blk_sync_queue(q); ++ blk_flush_integrity(); ++ /* ++ * Allow using passthrough request again after the queue is torn down. ++ */ ++ blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q); ++ __blk_mq_unfreeze_queue(q, true); ++ + } + EXPORT_SYMBOL(del_gendisk); + +-- +2.35.3 + diff --git a/patches.suse/block-fix-incorrect-references-to-disk-objects.patch b/patches.suse/block-fix-incorrect-references-to-disk-objects.patch new file mode 100644 index 0000000..5a68222 --- /dev/null +++ b/patches.suse/block-fix-incorrect-references-to-disk-objects.patch @@ -0,0 +1,58 @@ +From: Zqiang +Date: Mon, 18 Oct 2021 18:34:22 +0800 +Subject: [PATCH] block: fix incorrect references to disk objects +Git-commit: 9fbfabfda25d8774c5a08634fdd2da000a924890 +Patch-mainline: v5.15-rc7 +References: jsc#PED-1183 + +When adding partitions to the disk, the reference count of the disk +object is increased. then alloc partition device and called +device_add(), if the device_add() return error, the reference +count of the disk object will be reduced twice, at put_device(pdev) +and put_disk(disk). this leads to the end of the object's life cycle +prematurely, and trigger following calltrace. + + __init_work+0x2d/0x50 kernel/workqueue.c:519 + synchronize_rcu_expedited+0x3af/0x650 kernel/rcu/tree_exp.h:847 + bdi_remove_from_list mm/backing-dev.c:938 [inline] + bdi_unregister+0x17f/0x5c0 mm/backing-dev.c:946 + release_bdi+0xa1/0xc0 mm/backing-dev.c:968 + kref_put include/linux/kref.h:65 [inline] + bdi_put+0x72/0xa0 mm/backing-dev.c:976 + bdev_free_inode+0x11e/0x220 block/bdev.c:408 + i_callback+0x3f/0x70 fs/inode.c:226 + rcu_do_batch kernel/rcu/tree.c:2508 [inline] + rcu_core+0x76d/0x16c0 kernel/rcu/tree.c:2743 + __do_softirq+0x1d7/0x93b kernel/softirq.c:558 + invoke_softirq kernel/softirq.c:432 [inline] + __irq_exit_rcu kernel/softirq.c:636 [inline] + irq_exit_rcu+0xf2/0x130 kernel/softirq.c:648 + sysvec_apic_timer_interrupt+0x93/0xc0 + +making disk is NULL when calling put_disk(). + +Reported-by: Hao Sun +Signed-off-by: Zqiang +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20211018103422.2043-1-qiang.zhang1211@gmail.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/partitions/core.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/block/partitions/core.c b/block/partitions/core.c +index 58c4c362c94f..7bea19dd9458 100644 +--- a/block/partitions/core.c ++++ b/block/partitions/core.c +@@ -423,6 +423,7 @@ static struct block_device *add_partition(struct gendisk *disk, int partno, + device_del(pdev); + out_put: + put_device(pdev); ++ return ERR_PTR(err); + out_put_disk: + put_disk(disk); + return ERR_PTR(err); +-- +2.35.3 + diff --git a/patches.suse/block-fix-req_bio_endio-append-error-handling.patch b/patches.suse/block-fix-req_bio_endio-append-error-handling.patch new file mode 100644 index 0000000..df6ef82 --- /dev/null +++ b/patches.suse/block-fix-req_bio_endio-append-error-handling.patch @@ -0,0 +1,38 @@ +From: Pavel Begunkov +Date: Fri, 22 Oct 2021 16:01:44 +0100 +Subject: [PATCH] block: fix req_bio_endio append error handling +Git-commit: 297db731847e7808881ec2123c7564067d594d39 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Shinichiro Kawasaki reports that there is a bug in a recent +req_bio_endio() patch causing problems with zonefs. As Shinichiro +suggested, inverse the condition in zone append path to resemble how it +was before: fail when it's not fully completed. + +Fixes: 478eb72b815f3 ("block: optimise req_bio_endio()") +Reported-by: Shinichiro Kawasaki +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/344ea4e334aace9148b41af5f2426da38c8aa65a.1634914228.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index d04ee72ba125..c19dfa8ea65e 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -640,7 +640,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio, + * Partial zone append completions cannot be supported as the + * BIO fragments may end up not being written sequentially. + */ +- if (bio->bi_iter.bi_size == nbytes) ++ if (bio->bi_iter.bi_size != nbytes) + bio->bi_status = BLK_STS_IOERR; + else + bio->bi_iter.bi_sector = rq->__sector; +-- +2.35.3 + diff --git a/patches.suse/block-fix-rq-qos-breakage-from-skipping-rq_qos_done_.patch b/patches.suse/block-fix-rq-qos-breakage-from-skipping-rq_qos_done_.patch index 3421760..0ab5f6e 100644 --- a/patches.suse/block-fix-rq-qos-breakage-from-skipping-rq_qos_done_.patch +++ b/patches.suse/block-fix-rq-qos-breakage-from-skipping-rq_qos_done_.patch @@ -97,11 +97,11 @@ Acked-by: Jan Kara return; - if (bio->bi_bdev && bio_flagged(bio, BIO_TRACKED)) -- rq_qos_done_bio(bio->bi_bdev->bd_disk->queue, bio); +- rq_qos_done_bio(bdev_get_queue(bio->bi_bdev), bio); + rq_qos_done_bio(bio); if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) { - trace_block_bio_complete(bio->bi_bdev->bd_disk->queue, bio); + trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio); --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -597,7 +597,7 @@ static void blkcg_iolatency_done_bio(str diff --git a/patches.suse/block-fix-too-broad-elevator-check-in-blk_mq_free_re.patch b/patches.suse/block-fix-too-broad-elevator-check-in-blk_mq_free_re.patch new file mode 100644 index 0000000..d074a0c --- /dev/null +++ b/patches.suse/block-fix-too-broad-elevator-check-in-blk_mq_free_re.patch @@ -0,0 +1,39 @@ +From: Jens Axboe +Date: Mon, 18 Oct 2021 20:54:39 -0600 +Subject: [PATCH] block: fix too broad elevator check in blk_mq_free_request() +Git-commit: e0d78afeb8d190164a823d5ef5821b0b3802af33 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We added RQF_ELV to tell whether there's an IO scheduler attached, and +RQF_ELVPRIV tells us whether there's an IO scheduler with private data +attached. Don't check RQF_ELV in blk_mq_free_request(), what we care +about here is just if we have scheduler private data attached. + +This fixes a boot crash + +Fixes: 2ff0682da6e0 ("block: store elevator state in request") +Reported-by: Yi Zhang +Reported-by: syzbot+eb8104072aeab6cc1195@syzkaller.appspotmail.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 9248edd8a7d3..bbc61394eef3 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -580,7 +580,7 @@ void blk_mq_free_request(struct request *rq) + struct request_queue *q = rq->q; + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; + +- if (rq->rq_flags & (RQF_ELVPRIV | RQF_ELV)) { ++ if (rq->rq_flags & RQF_ELVPRIV) { + struct elevator_queue *e = q->elevator; + + if (e->type->ops.finish_request) +-- +2.35.3 + diff --git a/patches.suse/block-get-rid-of-plug-list-sorting.patch b/patches.suse/block-get-rid-of-plug-list-sorting.patch new file mode 100644 index 0000000..0eaa2a1 --- /dev/null +++ b/patches.suse/block-get-rid-of-plug-list-sorting.patch @@ -0,0 +1,65 @@ +From: Jens Axboe +Date: Mon, 18 Oct 2021 10:08:49 -0600 +Subject: [PATCH] block: get rid of plug list sorting +Git-commit: df87eb0fce8fc891b43199447b9aeb3ea2d39bcf +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Even if we have multiple queues in the plug list, chances that they +are very interspersed is minimal. Don't bother spending CPU cycles +sorting the list. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 19 ------------------- + 1 file changed, 19 deletions(-) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 335ec3a7eab7..104019c0ea41 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -19,7 +19,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -2161,20 +2160,6 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, + spin_unlock(&ctx->lock); + } + +-static int plug_rq_cmp(void *priv, const struct list_head *a, +- const struct list_head *b) +-{ +- struct request *rqa = container_of(a, struct request, queuelist); +- struct request *rqb = container_of(b, struct request, queuelist); +- +- if (rqa->mq_ctx != rqb->mq_ctx) +- return rqa->mq_ctx > rqb->mq_ctx; +- if (rqa->mq_hctx != rqb->mq_hctx) +- return rqa->mq_hctx > rqb->mq_hctx; +- +- return blk_rq_pos(rqa) > blk_rq_pos(rqb); +-} +- + void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) + { + LIST_HEAD(list); +@@ -2182,10 +2167,6 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) + if (list_empty(&plug->mq_list)) + return; + list_splice_init(&plug->mq_list, &list); +- +- if (plug->rq_count > 2 && plug->multiple_queues) +- list_sort(NULL, &list, plug_rq_cmp); +- + plug->rq_count = 0; + + do { +-- +2.35.3 + diff --git a/patches.suse/block-handle-fast-path-of-bio-splitting-inline.patch b/patches.suse/block-handle-fast-path-of-bio-splitting-inline.patch new file mode 100644 index 0000000..e2a2b59 --- /dev/null +++ b/patches.suse/block-handle-fast-path-of-bio-splitting-inline.patch @@ -0,0 +1,139 @@ +From: Jens Axboe +Date: Wed, 13 Oct 2021 12:43:41 -0600 +Subject: [PATCH] block: handle fast path of bio splitting inline +Git-commit: abd45c159df5fcb7ac820e2825dac85de7c01c21 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +The fast path is no splitting needed. Separate the handling into a +check part we can inline, and an out-of-line handling path if we do +need to split. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-merge.c | 24 ++++++------------------ + block/blk-mq.c | 5 +++-- + block/blk.h | 27 ++++++++++++++++++++++++++- + 3 files changed, 35 insertions(+), 21 deletions(-) + +diff --git a/block/blk-merge.c b/block/blk-merge.c +index f88d7863f997..ec727234ac48 100644 +--- a/block/blk-merge.c ++++ b/block/blk-merge.c +@@ -324,6 +324,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, + + /** + * __blk_queue_split - split a bio and submit the second half ++ * @q: [in] request_queue new bio is being queued at + * @bio: [in, out] bio to be split + * @nr_segs: [out] number of segments in the first bio + * +@@ -334,9 +335,9 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, + * of the caller to ensure that q->bio_split is only released after processing + * of the split bio has finished. + */ +-void __blk_queue_split(struct bio **bio, unsigned int *nr_segs) ++void __blk_queue_split(struct request_queue *q, struct bio **bio, ++ unsigned int *nr_segs) + { +- struct request_queue *q = (*bio)->bi_bdev->bd_disk->queue; + struct bio *split = NULL; + + switch (bio_op(*bio)) { +@@ -353,21 +354,6 @@ void __blk_queue_split(struct bio **bio, unsigned int *nr_segs) + nr_segs); + break; + default: +- /* +- * All drivers must accept single-segments bios that are <= +- * PAGE_SIZE. This is a quick and dirty check that relies on +- * the fact that bi_io_vec[0] is always valid if a bio has data. +- * The check might lead to occasional false negatives when bios +- * are cloned, but compared to the performance impact of cloned +- * bios themselves the loop below doesn't matter anyway. +- */ +- if (!q->limits.chunk_sectors && +- (*bio)->bi_vcnt == 1 && +- ((*bio)->bi_io_vec[0].bv_len + +- (*bio)->bi_io_vec[0].bv_offset) <= PAGE_SIZE) { +- *nr_segs = 1; +- break; +- } + split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs); + break; + } +@@ -397,9 +383,11 @@ void __blk_queue_split(struct bio **bio, unsigned int *nr_segs) + */ + void blk_queue_split(struct bio **bio) + { ++ struct request_queue *q = (*bio)->bi_bdev->bd_disk->queue; + unsigned int nr_segs; + +- __blk_queue_split(bio, &nr_segs); ++ if (blk_may_split(q, *bio)) ++ __blk_queue_split(q, bio, &nr_segs); + } + EXPORT_SYMBOL(blk_queue_split); + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 0860f622099f..09219080855f 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -2259,11 +2259,12 @@ void blk_mq_submit_bio(struct bio *bio) + struct request *rq; + struct blk_plug *plug; + struct request *same_queue_rq = NULL; +- unsigned int nr_segs; ++ unsigned int nr_segs = 1; + blk_status_t ret; + + blk_queue_bounce(q, &bio); +- __blk_queue_split(&bio, &nr_segs); ++ if (blk_may_split(q, bio)) ++ __blk_queue_split(q, &bio, &nr_segs); + + if (!bio_integrity_prep(bio)) + goto queue_exit; +diff --git a/block/blk.h b/block/blk.h +index fa05d3f07976..447a2defe2c8 100644 +--- a/block/blk.h ++++ b/block/blk.h +@@ -266,7 +266,32 @@ ssize_t part_timeout_show(struct device *, struct device_attribute *, char *); + ssize_t part_timeout_store(struct device *, struct device_attribute *, + const char *, size_t); + +-void __blk_queue_split(struct bio **bio, unsigned int *nr_segs); ++static inline bool blk_may_split(struct request_queue *q, struct bio *bio) ++{ ++ switch (bio_op(bio)) { ++ case REQ_OP_DISCARD: ++ case REQ_OP_SECURE_ERASE: ++ case REQ_OP_WRITE_ZEROES: ++ case REQ_OP_WRITE_SAME: ++ return true; /* non-trivial splitting decisions */ ++ default: ++ break; ++ } ++ ++ /* ++ * All drivers must accept single-segments bios that are <= PAGE_SIZE. ++ * This is a quick and dirty check that relies on the fact that ++ * bi_io_vec[0] is always valid if a bio has data. The check might ++ * lead to occasional false negatives when bios are cloned, but compared ++ * to the performance impact of cloned bios themselves the loop below ++ * doesn't matter anyway. ++ */ ++ return q->limits.chunk_sectors || bio->bi_vcnt != 1 || ++ bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE; ++} ++ ++void __blk_queue_split(struct request_queue *q, struct bio **bio, ++ unsigned int *nr_segs); + int ll_back_merge_fn(struct request *req, struct bio *bio, + unsigned int nr_segs); + bool blk_attempt_req_merge(struct request_queue *q, struct request *rq, +-- +2.35.3 + diff --git a/patches.suse/block-improve-layout-of-struct-request.patch b/patches.suse/block-improve-layout-of-struct-request.patch new file mode 100644 index 0000000..89fd35d --- /dev/null +++ b/patches.suse/block-improve-layout-of-struct-request.patch @@ -0,0 +1,139 @@ +From: Jens Axboe +Date: Fri, 15 Oct 2021 15:03:52 -0600 +Subject: [PATCH] block: improve layout of struct request +Git-commit: b60876296847e6cd7f1da4b8b7f0f31399d59aa1 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +It's been a while since this was analyzed, move some members around to +better flow with the use case. Initial state up top, and queued state +after that. This improves my peak case by about 1.5%, from 7750K to +7900K IOPS. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + include/linux/blk-mq.h | 90 +++++++++++++++++++++--------------------- + 1 file changed, 46 insertions(+), 44 deletions(-) + +diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h +index a9c1d0882550..8ca9728cc7f2 100644 +--- a/include/linux/blk-mq.h ++++ b/include/linux/blk-mq.h +@@ -83,6 +83,8 @@ struct request { + int tag; + int internal_tag; + ++ unsigned int timeout; ++ + /* the following two fields are internal, NEVER access directly */ + unsigned int __data_len; /* total data len */ + sector_t __sector; /* sector cursor */ +@@ -95,49 +97,6 @@ struct request { + struct request *rq_next; + }; + +- /* +- * The hash is used inside the scheduler, and killed once the +- * request reaches the dispatch list. The ipi_list is only used +- * to queue the request for softirq completion, which is long +- * after the request has been unhashed (and even removed from +- * the dispatch list). +- */ +- union { +- struct hlist_node hash; /* merge hash */ +- struct llist_node ipi_list; +- }; +- +- /* +- * The rb_node is only used inside the io scheduler, requests +- * are pruned when moved to the dispatch queue. So let the +- * completion_data share space with the rb_node. +- */ +- union { +- struct rb_node rb_node; /* sort/lookup */ +- struct bio_vec special_vec; +- void *completion_data; +- int error_count; /* for legacy drivers, don't use */ +- }; +- +- /* +- * Three pointers are available for the IO schedulers, if they need +- * more they have to dynamically allocate it. Flush requests are +- * never put on the IO scheduler. So let the flush fields share +- * space with the elevator data. +- */ +- union { +- struct { +- struct io_cq *icq; +- void *priv[2]; +- } elv; +- +- struct { +- unsigned int seq; +- struct list_head list; +- rq_end_io_fn *saved_end_io; +- } flush; +- }; +- + struct gendisk *rq_disk; + struct block_device *part; + #ifdef CONFIG_BLK_RQ_ALLOC_TIME +@@ -180,9 +139,52 @@ struct request { + enum mq_rq_state state; + refcount_t ref; + +- unsigned int timeout; + unsigned long deadline; + ++ /* ++ * The hash is used inside the scheduler, and killed once the ++ * request reaches the dispatch list. The ipi_list is only used ++ * to queue the request for softirq completion, which is long ++ * after the request has been unhashed (and even removed from ++ * the dispatch list). ++ */ ++ union { ++ struct hlist_node hash; /* merge hash */ ++ struct llist_node ipi_list; ++ }; ++ ++ /* ++ * The rb_node is only used inside the io scheduler, requests ++ * are pruned when moved to the dispatch queue. So let the ++ * completion_data share space with the rb_node. ++ */ ++ union { ++ struct rb_node rb_node; /* sort/lookup */ ++ struct bio_vec special_vec; ++ void *completion_data; ++ int error_count; /* for legacy drivers, don't use */ ++ }; ++ ++ ++ /* ++ * Three pointers are available for the IO schedulers, if they need ++ * more they have to dynamically allocate it. Flush requests are ++ * never put on the IO scheduler. So let the flush fields share ++ * space with the elevator data. ++ */ ++ union { ++ struct { ++ struct io_cq *icq; ++ void *priv[2]; ++ } elv; ++ ++ struct { ++ unsigned int seq; ++ struct list_head list; ++ rq_end_io_fn *saved_end_io; ++ } flush; ++ }; ++ + union { + struct __call_single_data csd; + u64 fifo_time; +-- +2.35.3 + diff --git a/patches.suse/block-improve-readability-of-blk_mq_end_request_batc.patch b/patches.suse/block-improve-readability-of-blk_mq_end_request_batc.patch new file mode 100644 index 0000000..4349ac2 --- /dev/null +++ b/patches.suse/block-improve-readability-of-blk_mq_end_request_batc.patch @@ -0,0 +1,56 @@ +From: Jens Axboe +Date: Thu, 28 Oct 2021 12:08:34 -0600 +Subject: [PATCH] block: improve readability of blk_mq_end_request_batch() +Git-commit: 02f7eab0095a47b45f48a4321d33de3569c59061 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +It's faster and easier to read if we tolerate cur_hctx being NULL in +the "when to flush" condition. Rename last_hctx to cur_hctx while at it, +as it better describes the role of that variable. + +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index ec966e0b172d..221d1b7d10d6 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -822,7 +822,7 @@ static inline void blk_mq_flush_tag_batch(struct blk_mq_hw_ctx *hctx, + void blk_mq_end_request_batch(struct io_comp_batch *iob) + { + int tags[TAG_COMP_BATCH], nr_tags = 0; +- struct blk_mq_hw_ctx *last_hctx = NULL; ++ struct blk_mq_hw_ctx *cur_hctx = NULL; + struct request *rq; + u64 now = 0; + +@@ -845,17 +845,17 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob) + blk_pm_mark_last_busy(rq); + rq_qos_done(rq->q, rq); + +- if (nr_tags == TAG_COMP_BATCH || +- (last_hctx && last_hctx != rq->mq_hctx)) { +- blk_mq_flush_tag_batch(last_hctx, tags, nr_tags); ++ if (nr_tags == TAG_COMP_BATCH || cur_hctx != rq->mq_hctx) { ++ if (cur_hctx) ++ blk_mq_flush_tag_batch(cur_hctx, tags, nr_tags); + nr_tags = 0; ++ cur_hctx = rq->mq_hctx; + } + tags[nr_tags++] = rq->tag; +- last_hctx = rq->mq_hctx; + } + + if (nr_tags) +- blk_mq_flush_tag_batch(last_hctx, tags, nr_tags); ++ blk_mq_flush_tag_batch(cur_hctx, tags, nr_tags); + } + EXPORT_SYMBOL_GPL(blk_mq_end_request_batch); + +-- +2.35.3 + diff --git a/patches.suse/block-inline-a-part-of-bio_release_pages.patch b/patches.suse/block-inline-a-part-of-bio_release_pages.patch new file mode 100644 index 0000000..0bbed35 --- /dev/null +++ b/patches.suse/block-inline-a-part-of-bio_release_pages.patch @@ -0,0 +1,75 @@ +From: Pavel Begunkov +Date: Tue, 19 Oct 2021 22:24:14 +0100 +Subject: [PATCH] block: inline a part of bio_release_pages() +Git-commit: c809084ab033a8d4ee404e2ac3c5d3dc80cb65f7 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Inline BIO_NO_PAGE_REF check of bio_release_pages() to avoid function +call. + +Signed-off-by: Pavel Begunkov +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/bio.c | 7 ++----- + include/linux/bio.h | 8 +++++++- + 2 files changed, 9 insertions(+), 6 deletions(-) + +diff --git a/block/bio.c b/block/bio.c +index 4f397ba47db5..46a87c72d2b4 100644 +--- a/block/bio.c ++++ b/block/bio.c +@@ -1033,21 +1033,18 @@ int bio_add_page(struct bio *bio, struct page *page, + } + EXPORT_SYMBOL(bio_add_page); + +-void bio_release_pages(struct bio *bio, bool mark_dirty) ++void __bio_release_pages(struct bio *bio, bool mark_dirty) + { + struct bvec_iter_all iter_all; + struct bio_vec *bvec; + +- if (bio_flagged(bio, BIO_NO_PAGE_REF)) +- return; +- + bio_for_each_segment_all(bvec, bio, iter_all) { + if (mark_dirty && !PageCompound(bvec->bv_page)) + set_page_dirty_lock(bvec->bv_page); + put_page(bvec->bv_page); + } + } +-EXPORT_SYMBOL_GPL(bio_release_pages); ++EXPORT_SYMBOL_GPL(__bio_release_pages); + + static void __bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter) + { +diff --git a/include/linux/bio.h b/include/linux/bio.h +index b12453d7b8a8..c88700d1bdc3 100644 +--- a/include/linux/bio.h ++++ b/include/linux/bio.h +@@ -417,7 +417,7 @@ int bio_add_zone_append_page(struct bio *bio, struct page *page, + void __bio_add_page(struct bio *bio, struct page *page, + unsigned int len, unsigned int off); + int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); +-void bio_release_pages(struct bio *bio, bool mark_dirty); ++void __bio_release_pages(struct bio *bio, bool mark_dirty); + extern void bio_set_pages_dirty(struct bio *bio); + extern void bio_check_pages_dirty(struct bio *bio); + +@@ -428,6 +428,12 @@ extern void bio_free_pages(struct bio *bio); + void guard_bio_eod(struct bio *bio); + void zero_fill_bio(struct bio *bio); + ++static inline void bio_release_pages(struct bio *bio, bool mark_dirty) ++{ ++ if (!bio_flagged(bio, BIO_NO_PAGE_REF)) ++ __bio_release_pages(bio, mark_dirty); ++} ++ + extern const char *bio_devname(struct bio *bio, char *buffer); + + #define bio_dev(bio) \ +-- +2.35.3 + diff --git a/patches.suse/block-inline-fast-path-of-driver-tag-allocation.patch b/patches.suse/block-inline-fast-path-of-driver-tag-allocation.patch new file mode 100644 index 0000000..9a55e14 --- /dev/null +++ b/patches.suse/block-inline-fast-path-of-driver-tag-allocation.patch @@ -0,0 +1,74 @@ +From: Jens Axboe +Date: Wed, 13 Oct 2021 08:28:14 -0600 +Subject: [PATCH] block: inline fast path of driver tag allocation +Git-commit: a808a9d545cdffb964f27239d1fc0c6e2330b424 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +If we don't use an IO scheduler or have shared tags, then we don't need +to call into this external function at all. This saves ~2% for such +a setup. + +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 8 +++----- + block/blk-mq.h | 15 ++++++++++++++- + 2 files changed, 17 insertions(+), 6 deletions(-) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 3481a8712234..bf5936d72de8 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -1324,7 +1324,7 @@ struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, + return data.rq; + } + +-static bool __blk_mq_get_driver_tag(struct request *rq) ++static bool __blk_mq_alloc_driver_tag(struct request *rq) + { + struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags; + unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags; +@@ -1348,11 +1348,9 @@ static bool __blk_mq_get_driver_tag(struct request *rq) + return true; + } + +-bool blk_mq_get_driver_tag(struct request *rq) ++bool __blk_mq_get_driver_tag(struct blk_mq_hw_ctx *hctx, struct request *rq) + { +- struct blk_mq_hw_ctx *hctx = rq->mq_hctx; +- +- if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_get_driver_tag(rq)) ++ if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_alloc_driver_tag(rq)) + return false; + + if ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) && +diff --git a/block/blk-mq.h b/block/blk-mq.h +index ebf67f4d4f2e..d8ccb341e82e 100644 +--- a/block/blk-mq.h ++++ b/block/blk-mq.h +@@ -258,7 +258,20 @@ static inline void blk_mq_put_driver_tag(struct request *rq) + __blk_mq_put_driver_tag(rq->mq_hctx, rq); + } + +-bool blk_mq_get_driver_tag(struct request *rq); ++bool __blk_mq_get_driver_tag(struct blk_mq_hw_ctx *hctx, struct request *rq); ++ ++static inline bool blk_mq_get_driver_tag(struct request *rq) ++{ ++ struct blk_mq_hw_ctx *hctx = rq->mq_hctx; ++ ++ if (rq->tag != BLK_MQ_NO_TAG && ++ !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) { ++ hctx->tags->rqs[rq->tag] = rq; ++ return true; ++ } ++ ++ return __blk_mq_get_driver_tag(hctx, rq); ++} + + static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap) + { +-- +2.35.3 + diff --git a/patches.suse/block-ioctl-use-bdev_nr_sectors-and-bdev_nr_bytes.patch b/patches.suse/block-ioctl-use-bdev_nr_sectors-and-bdev_nr_bytes.patch new file mode 100644 index 0000000..5834dbf --- /dev/null +++ b/patches.suse/block-ioctl-use-bdev_nr_sectors-and-bdev_nr_bytes.patch @@ -0,0 +1,102 @@ +From: Christoph Hellwig +Date: Tue, 19 Oct 2021 08:20:22 +0200 +Subject: [PATCH] block/ioctl: use bdev_nr_sectors and bdev_nr_bytes +Git-commit: 946e99373037be4841e8b42dcd136e03093c9fd5 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helper to read the block device size. + +Signed-off-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20211019062024.2171074-2-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/ioctl.c | 20 ++++++++------------ + 1 file changed, 8 insertions(+), 12 deletions(-) + +diff --git a/block/ioctl.c b/block/ioctl.c +index 77b1b2453f39..d6af0ac97e57 100644 +--- a/block/ioctl.c ++++ b/block/ioctl.c +@@ -132,7 +132,7 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, + if (len & 511) + return -EINVAL; + +- if (start + len > i_size_read(bdev->bd_inode)) ++ if (start + len > bdev_nr_bytes(bdev)) + return -EINVAL; + + err = truncate_bdev_range(bdev, mode, start, start + len - 1); +@@ -164,7 +164,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode, + return -EINVAL; + if (len & 511) + return -EINVAL; +- if (end >= (uint64_t)i_size_read(bdev->bd_inode)) ++ if (end >= (uint64_t)bdev_nr_bytes(bdev)) + return -EINVAL; + if (end < start) + return -EINVAL; +@@ -543,7 +543,6 @@ long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) + struct block_device *bdev = I_BDEV(file->f_mapping->host); + void __user *argp = (void __user *)arg; + fmode_t mode = file->f_mode; +- loff_t size; + int ret; + + /* +@@ -570,10 +569,9 @@ long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) + return put_long(argp, + (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); + case BLKGETSIZE: +- size = i_size_read(bdev->bd_inode); +- if ((size >> 9) > ~0UL) ++ if (bdev_nr_sectors(bdev) > ~0UL) + return -EFBIG; +- return put_ulong(argp, size >> 9); ++ return put_ulong(argp, bdev_nr_sectors(bdev)); + + /* The data is compatible, but the command number is different */ + case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ +@@ -581,7 +579,7 @@ long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) + case BLKBSZSET: + return blkdev_bszset(bdev, mode, argp); + case BLKGETSIZE64: +- return put_u64(argp, i_size_read(bdev->bd_inode)); ++ return put_u64(argp, bdev_nr_bytes(bdev)); + + /* Incompatible alignment on i386 */ + case BLKTRACESETUP: +@@ -615,7 +613,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) + struct block_device *bdev = I_BDEV(file->f_mapping->host); + struct gendisk *disk = bdev->bd_disk; + fmode_t mode = file->f_mode; +- loff_t size; + + /* + * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have +@@ -641,10 +638,9 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) + return compat_put_long(argp, + (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); + case BLKGETSIZE: +- size = i_size_read(bdev->bd_inode); +- if ((size >> 9) > ~0UL) ++ if (bdev_nr_sectors(bdev) > ~0UL) + return -EFBIG; +- return compat_put_ulong(argp, size >> 9); ++ return compat_put_ulong(argp, bdev_nr_sectors(bdev)); + + /* The data is compatible, but the command number is different */ + case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ +@@ -652,7 +648,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) + case BLKBSZSET_32: + return blkdev_bszset(bdev, mode, argp); + case BLKGETSIZE64_32: +- return put_u64(argp, i_size_read(bdev->bd_inode)); ++ return put_u64(argp, bdev_nr_bytes(bdev)); + + /* Incompatible alignment on i386 */ + case BLKTRACESETUP32: +-- +2.35.3 + diff --git a/patches.suse/block-kill-DIO_MULTI_BIO.patch b/patches.suse/block-kill-DIO_MULTI_BIO.patch new file mode 100644 index 0000000..05f3482 --- /dev/null +++ b/patches.suse/block-kill-DIO_MULTI_BIO.patch @@ -0,0 +1,97 @@ +From: Pavel Begunkov +Date: Wed, 27 Oct 2021 13:21:09 +0100 +Subject: [PATCH] block: kill DIO_MULTI_BIO +Git-commit: e71aa913e26543768d5acaef50abe14913c6c496 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Now __blkdev_direct_IO() serves only multi-bio I/O, thus remove +not used anymore single bio refcounting optimisations. + +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/88eb488aae9ed4852a30f3a7132f296f56e43b80.1635337135.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/fops.c | 33 ++++++++++++--------------------- + 1 file changed, 12 insertions(+), 21 deletions(-) + +diff --git a/block/fops.c b/block/fops.c +index 983e993c9a4b..8594852bd344 100644 +--- a/block/fops.c ++++ b/block/fops.c +@@ -124,9 +124,8 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, + } + + enum { +- DIO_MULTI_BIO = 1, +- DIO_SHOULD_DIRTY = 2, +- DIO_IS_SYNC = 4, ++ DIO_SHOULD_DIRTY = 1, ++ DIO_IS_SYNC = 2, + }; + + struct blkdev_dio { +@@ -150,7 +149,7 @@ static void blkdev_bio_end_io(struct bio *bio) + if (bio->bi_status && !dio->bio.bi_status) + dio->bio.bi_status = bio->bi_status; + +- if (!(dio->flags & DIO_MULTI_BIO) || atomic_dec_and_test(&dio->ref)) { ++ if (atomic_dec_and_test(&dio->ref)) { + if (!(dio->flags & DIO_IS_SYNC)) { + struct kiocb *iocb = dio->iocb; + ssize_t ret; +@@ -165,8 +164,7 @@ static void blkdev_bio_end_io(struct bio *bio) + } + + dio->iocb->ki_complete(iocb, ret, 0); +- if (dio->flags & DIO_MULTI_BIO) +- bio_put(&dio->bio); ++ bio_put(&dio->bio); + } else { + struct task_struct *waiter = dio->waiter; + +@@ -201,11 +199,17 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool); + + dio = container_of(bio, struct blkdev_dio, bio); ++ atomic_set(&dio->ref, 1); ++ /* ++ * Grab an extra reference to ensure the dio structure which is embedded ++ * into the first bio stays around. ++ */ ++ bio_get(bio); ++ + is_sync = is_sync_kiocb(iocb); + if (is_sync) { + dio->flags = DIO_IS_SYNC; + dio->waiter = current; +- bio_get(bio); + } else { + dio->flags = 0; + dio->iocb = iocb; +@@ -251,20 +255,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + submit_bio(bio); + break; + } +- if (!(dio->flags & DIO_MULTI_BIO)) { +- /* +- * AIO needs an extra reference to ensure the dio +- * structure which is embedded into the first bio +- * stays around. +- */ +- if (!is_sync) +- bio_get(bio); +- dio->flags |= DIO_MULTI_BIO; +- atomic_set(&dio->ref, 2); +- } else { +- atomic_inc(&dio->ref); +- } +- ++ atomic_inc(&dio->ref); + submit_bio(bio); + bio = bio_alloc(GFP_KERNEL, nr_pages); + } +-- +2.35.3 + diff --git a/patches.suse/block-kill-extra-rcu-lock-unlock-in-queue-enter.patch b/patches.suse/block-kill-extra-rcu-lock-unlock-in-queue-enter.patch new file mode 100644 index 0000000..6422671 --- /dev/null +++ b/patches.suse/block-kill-extra-rcu-lock-unlock-in-queue-enter.patch @@ -0,0 +1,45 @@ +From: Pavel Begunkov +Date: Thu, 21 Oct 2021 14:30:52 +0100 +Subject: [PATCH] block: kill extra rcu lock/unlock in queue enter +Git-commit: e94f68527a35271131cdf9d3fb4eb3c2513dc3d0 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +blk_try_enter_queue() already takes rcu_read_lock/unlock, so we can +avoid the second pair in percpu_ref_tryget_live(), use a newly added +percpu_ref_tryget_live_rcu(). + +As rcu_read_lock/unlock imply barrier()s, it's pretty noticeable, +especially for for !CONFIG_PREEMPT_RCU (default for some distributions), +where __rcu_read_lock/unlock() are not inlined. + +3.20% io_uring [kernel.vmlinux] [k] __rcu_read_unlock +3.05% io_uring [kernel.vmlinux] [k] __rcu_read_lock + +2.52% io_uring [kernel.vmlinux] [k] __rcu_read_unlock +2.28% io_uring [kernel.vmlinux] [k] __rcu_read_lock + +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/6b11c67ea495ed9d44f067622d852de4a510ce65.1634822969.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/blk-core.c b/block/blk-core.c +index dfa199312c2f..fd389a16013c 100644 +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -389,7 +389,7 @@ EXPORT_SYMBOL(blk_cleanup_queue); + static bool blk_try_enter_queue(struct request_queue *q, bool pm) + { + rcu_read_lock(); +- if (!percpu_ref_tryget_live(&q->q_usage_counter)) ++ if (!percpu_ref_tryget_live_rcu(&q->q_usage_counter)) + goto fail; + + /* +-- +2.35.3 + diff --git a/patches.suse/block-kill-unused-polling-bits-in-__blkdev_direct_IO.patch b/patches.suse/block-kill-unused-polling-bits-in-__blkdev_direct_IO.patch new file mode 100644 index 0000000..432147d --- /dev/null +++ b/patches.suse/block-kill-unused-polling-bits-in-__blkdev_direct_IO.patch @@ -0,0 +1,89 @@ +From: Pavel Begunkov +Date: Wed, 27 Oct 2021 13:21:08 +0100 +Subject: [PATCH] block: kill unused polling bits in __blkdev_direct_IO() +Git-commit: 25d207dc22271c2232df2d610ce4be6e125d1de8 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +With addition of __blkdev_direct_IO_async(), __blkdev_direct_IO() now +serves only multio-bio I/O, which we don't poll. Now we can remove +anything related to I/O polling from it. + +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/b8c597a6b7ee612df394853bfd24726aee5b898e.1635337135.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/fops.c | 20 +++----------------- + 1 file changed, 3 insertions(+), 17 deletions(-) + +diff --git a/block/fops.c b/block/fops.c +index 092e5079e827..983e993c9a4b 100644 +--- a/block/fops.c ++++ b/block/fops.c +@@ -190,7 +190,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + struct blk_plug plug; + struct blkdev_dio *dio; + struct bio *bio; +- bool do_poll = (iocb->ki_flags & IOCB_HIPRI); + bool is_read = (iov_iter_rw(iter) == READ), is_sync; + loff_t pos = iocb->ki_pos; + int ret = 0; +@@ -216,12 +215,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + if (is_read && iter_is_iovec(iter)) + dio->flags |= DIO_SHOULD_DIRTY; + +- /* +- * Don't plug for HIPRI/polled IO, as those should go straight +- * to issue +- */ +- if (!(iocb->ki_flags & IOCB_HIPRI)) +- blk_start_plug(&plug); ++ blk_start_plug(&plug); + + for (;;) { + bio_set_dev(bio, bdev); +@@ -254,11 +248,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + + nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS); + if (!nr_pages) { +- if (do_poll) +- bio_set_polled(bio, iocb); + submit_bio(bio); +- if (do_poll) +- WRITE_ONCE(iocb->private, bio); + break; + } + if (!(dio->flags & DIO_MULTI_BIO)) { +@@ -271,7 +261,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + bio_get(bio); + dio->flags |= DIO_MULTI_BIO; + atomic_set(&dio->ref, 2); +- do_poll = false; + } else { + atomic_inc(&dio->ref); + } +@@ -280,8 +269,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + bio = bio_alloc(GFP_KERNEL, nr_pages); + } + +- if (!(iocb->ki_flags & IOCB_HIPRI)) +- blk_finish_plug(&plug); ++ blk_finish_plug(&plug); + + if (!is_sync) + return -EIOCBQUEUED; +@@ -290,9 +278,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + set_current_state(TASK_UNINTERRUPTIBLE); + if (!READ_ONCE(dio->waiter)) + break; +- +- if (!do_poll || !bio_poll(bio, NULL, 0)) +- blk_io_schedule(); ++ blk_io_schedule(); + } + __set_current_state(TASK_RUNNING); + +-- +2.35.3 + diff --git a/patches.suse/block-move-CONFIG_BLOCK-guard-to-top-Makefile.patch b/patches.suse/block-move-CONFIG_BLOCK-guard-to-top-Makefile.patch new file mode 100644 index 0000000..2ae09a1 --- /dev/null +++ b/patches.suse/block-move-CONFIG_BLOCK-guard-to-top-Makefile.patch @@ -0,0 +1,52 @@ +From: Masahiro Yamada +Date: Mon, 27 Sep 2021 23:00:00 +0900 +Subject: [PATCH] block: move CONFIG_BLOCK guard to top Makefile +Git-commit: 4c928904ff771a8e830773b71a080047365324a5 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Every object under block/ depends on CONFIG_BLOCK. + +Move the guard to the top Makefile since there is no point to +descend into block/ if CONFIG_BLOCK=n. + +Signed-off-by: Masahiro Yamada +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20210927140000.866249-5-masahiroy@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + Makefile | 3 ++- + block/Makefile | 2 +- + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/Makefile b/Makefile +index 91297670da8e..95ba2c34dc33 100644 +--- a/Makefile ++++ b/Makefile +@@ -1115,7 +1115,8 @@ export MODORDER := $(extmod_prefix)modules.order + export MODULES_NSDEPS := $(extmod_prefix)modules.nsdeps + + ifeq ($(KBUILD_EXTMOD),) +-core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ ++core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ ++core-$(CONFIG_BLOCK) += block/ + + vmlinux-dirs := $(patsubst %/,%,$(filter %/, \ + $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ +diff --git a/block/Makefile b/block/Makefile +index 41aa1ba69c90..74df168729ec 100644 +--- a/block/Makefile ++++ b/block/Makefile +@@ -3,7 +3,7 @@ + # Makefile for the kernel block layer + # + +-obj-$(CONFIG_BLOCK) := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \ ++obj-y := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \ + blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ + blk-exec.o blk-merge.o blk-timeout.o \ + blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ +-- +2.35.3 + diff --git a/patches.suse/block-move-bdev_read_only-into-the-header.patch b/patches.suse/block-move-bdev_read_only-into-the-header.patch new file mode 100644 index 0000000..0f92b82 --- /dev/null +++ b/patches.suse/block-move-bdev_read_only-into-the-header.patch @@ -0,0 +1,55 @@ +From: Jens Axboe +Date: Wed, 6 Oct 2021 06:15:04 -0600 +Subject: [PATCH] block: move bdev_read_only() into the header +Git-commit: db9a02baa23267c695a44234a0f2f4607992780e +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +This is called for every write in the fast path, move it inline next +to get_disk_ro() which is called internally. + +Reviewed-by: Christoph Hellwig +Reviewed-by: Chaitanya Kulkarni +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/genhd.c | 6 ------ + include/linux/genhd.h | 5 +++++ + 2 files changed, 5 insertions(+), 6 deletions(-) + +diff --git a/block/genhd.c b/block/genhd.c +index 759bc06810f8..80943c123c3e 100644 +--- a/block/genhd.c ++++ b/block/genhd.c +@@ -1389,12 +1389,6 @@ void set_disk_ro(struct gendisk *disk, bool read_only) + } + EXPORT_SYMBOL(set_disk_ro); + +-int bdev_read_only(struct block_device *bdev) +-{ +- return bdev->bd_read_only || get_disk_ro(bdev->bd_disk); +-} +-EXPORT_SYMBOL(bdev_read_only); +- + void inc_diskseq(struct gendisk *disk) + { + disk->diskseq = atomic64_inc_return(&diskseq); +diff --git a/include/linux/genhd.h b/include/linux/genhd.h +index cd4038fd5743..c70bc5fce4db 100644 +--- a/include/linux/genhd.h ++++ b/include/linux/genhd.h +@@ -221,6 +221,11 @@ static inline int get_disk_ro(struct gendisk *disk) + test_bit(GD_READ_ONLY, &disk->state); + } + ++static inline int bdev_read_only(struct block_device *bdev) ++{ ++ return bdev->bd_read_only || get_disk_ro(bdev->bd_disk); ++} ++ + extern void disk_block_events(struct gendisk *disk); + extern void disk_unblock_events(struct gendisk *disk); + extern void disk_flush_events(struct gendisk *disk, unsigned int mask); +-- +2.35.3 + diff --git a/patches.suse/block-move-blk_mq_tag_to_rq-inline.patch b/patches.suse/block-move-blk_mq_tag_to_rq-inline.patch new file mode 100644 index 0000000..c4f349a --- /dev/null +++ b/patches.suse/block-move-blk_mq_tag_to_rq-inline.patch @@ -0,0 +1,135 @@ +From: Jens Axboe +Date: Sat, 16 Oct 2021 16:38:14 -0600 +Subject: [PATCH] block: move blk_mq_tag_to_rq() inline +Git-commit: e028f167eca5fc56938e6c1680d40eed0bc39e80 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +This is in the fast path of driver issue or completion, and it's a single +array index operation. Move it inline to avoid a function call for it. + +This does mean making struct blk_mq_tags block layer public, but there's +not really much in there. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq-tag.h | 23 ----------------------- + block/blk-mq.c | 11 ----------- + include/linux/blk-mq.h | 36 +++++++++++++++++++++++++++++++++++- + 3 files changed, 35 insertions(+), 35 deletions(-) + +diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h +index 78ae2fb8e2a4..df787b5a23bd 100644 +--- a/block/blk-mq-tag.h ++++ b/block/blk-mq-tag.h +@@ -4,29 +4,6 @@ + + struct blk_mq_alloc_data; + +-/* +- * Tag address space map. +- */ +-struct blk_mq_tags { +- unsigned int nr_tags; +- unsigned int nr_reserved_tags; +- +- atomic_t active_queues; +- +- struct sbitmap_queue bitmap_tags; +- struct sbitmap_queue breserved_tags; +- +- struct request **rqs; +- struct request **static_rqs; +- struct list_head page_list; +- +- /* +- * used to clear request reference in rqs[] before freeing one +- * request pool +- */ +- spinlock_t lock; +-}; +- + extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, + unsigned int reserved_tags, + int node, int alloc_policy); +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 104019c0ea41..8f5c1662335b 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -1120,17 +1120,6 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q, + } + EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list); + +-struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) +-{ +- if (tag < tags->nr_tags) { +- prefetch(tags->rqs[tag]); +- return tags->rqs[tag]; +- } +- +- return NULL; +-} +-EXPORT_SYMBOL(blk_mq_tag_to_rq); +- + static bool blk_mq_rq_inflight(struct blk_mq_hw_ctx *hctx, struct request *rq, + void *priv, bool reserved) + { +diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h +index 656fe34bdb6c..6cf35de151a9 100644 +--- a/include/linux/blk-mq.h ++++ b/include/linux/blk-mq.h +@@ -7,6 +7,7 @@ + #include + #include + #include ++#include + + struct blk_mq_tags; + struct blk_flush_queue; +@@ -675,7 +676,40 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, + struct request *blk_mq_alloc_request_hctx(struct request_queue *q, + unsigned int op, blk_mq_req_flags_t flags, + unsigned int hctx_idx); +-struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); ++ ++/* ++ * Tag address space map. ++ */ ++struct blk_mq_tags { ++ unsigned int nr_tags; ++ unsigned int nr_reserved_tags; ++ ++ atomic_t active_queues; ++ ++ struct sbitmap_queue bitmap_tags; ++ struct sbitmap_queue breserved_tags; ++ ++ struct request **rqs; ++ struct request **static_rqs; ++ struct list_head page_list; ++ ++ /* ++ * used to clear request reference in rqs[] before freeing one ++ * request pool ++ */ ++ spinlock_t lock; ++}; ++ ++static inline struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, ++ unsigned int tag) ++{ ++ if (tag < tags->nr_tags) { ++ prefetch(tags->rqs[tag]); ++ return tags->rqs[tag]; ++ } ++ ++ return NULL; ++} + + enum { + BLK_MQ_UNIQUE_TAG_BITS = 16, +-- +2.35.3 + diff --git a/patches.suse/block-move-menu-Partition-type-to-block-partitions-K.patch b/patches.suse/block-move-menu-Partition-type-to-block-partitions-K.patch new file mode 100644 index 0000000..17168e9 --- /dev/null +++ b/patches.suse/block-move-menu-Partition-type-to-block-partitions-K.patch @@ -0,0 +1,58 @@ +From: Masahiro Yamada +Date: Mon, 27 Sep 2021 22:59:59 +0900 +Subject: [PATCH] block: move menu "Partition type" to block/partitions/Kconfig +Git-commit: b8b98a6225c9140ff5c2b3dce99a70ffba98e6d3 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Move the menu to the relevant place. + +Signed-off-by: Masahiro Yamada +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20210927140000.866249-4-masahiroy@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/Kconfig | 4 ---- + block/partitions/Kconfig | 4 ++++ + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/block/Kconfig b/block/Kconfig +index c4d35829ea4f..c6ce41a5e5b2 100644 +--- a/block/Kconfig ++++ b/block/Kconfig +@@ -190,12 +190,8 @@ config BLK_INLINE_ENCRYPTION_FALLBACK + by falling back to the kernel crypto API when inline + encryption hardware is not present. + +-menu "Partition Types" +- + source "block/partitions/Kconfig" + +-endmenu +- + config BLOCK_COMPAT + def_bool COMPAT + +diff --git a/block/partitions/Kconfig b/block/partitions/Kconfig +index 278593b8e4e9..7aff4eb81c60 100644 +--- a/block/partitions/Kconfig ++++ b/block/partitions/Kconfig +@@ -2,6 +2,8 @@ + # + # Partition configuration + # ++menu "Partition Types" ++ + config PARTITION_ADVANCED + bool "Advanced partition selection" + help +@@ -267,3 +269,5 @@ config CMDLINE_PARTITION + help + Say Y here if you want to read the partition table from bootargs. + The format for the command line is just like mtdparts. ++ ++endmenu +-- +2.35.3 + diff --git a/patches.suse/block-move-the-SECTOR_SIZE-related-definitions-to-bl.patch b/patches.suse/block-move-the-SECTOR_SIZE-related-definitions-to-bl.patch index 22ffe38..c545e9b 100644 --- a/patches.suse/block-move-the-SECTOR_SIZE-related-definitions-to-bl.patch +++ b/patches.suse/block-move-the-SECTOR_SIZE-related-definitions-to-bl.patch @@ -54,7 +54,7 @@ index fd9771a1da09..abe721591e80 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -578,23 +578,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev) - return bdev->bd_disk->queue; /* this is never NULL */ + return bdev->bd_queue; /* this is never NULL */ } -/* diff --git a/patches.suse/block-move-update-request-helpers-into-blk-mq.c.patch b/patches.suse/block-move-update-request-helpers-into-blk-mq.c.patch new file mode 100644 index 0000000..5a0cd58 --- /dev/null +++ b/patches.suse/block-move-update-request-helpers-into-blk-mq.c.patch @@ -0,0 +1,370 @@ +From: Jens Axboe +Date: Thu, 14 Oct 2021 09:17:01 -0600 +Subject: [PATCH] block: move update request helpers into blk-mq.c +Git-commit: 9be3e06fb75abcca00c955af740fabff46a13452 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +For some reason we still have them in blk-core, with the rest of the +request completion being in blk-mq. That causes and out-of-line call +for each completion. + +Move them into blk-mq.c instead, where they belong. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-core.c | 146 +---------------------------------------------- + block/blk-mq.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++ + block/blk.h | 1 + + 3 files changed, 146 insertions(+), 145 deletions(-) + +diff --git a/block/blk-core.c b/block/blk-core.c +index b4094b31c99c..20b6cc06461a 100644 +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -216,7 +216,7 @@ int blk_status_to_errno(blk_status_t status) + } + EXPORT_SYMBOL_GPL(blk_status_to_errno); + +-static void print_req_error(struct request *req, blk_status_t status) ++void blk_print_req_error(struct request *req, blk_status_t status) + { + int idx = (__force int)status; + +@@ -234,33 +234,6 @@ static void print_req_error(struct request *req, blk_status_t status) + IOPRIO_PRIO_CLASS(req->ioprio)); + } + +-static void req_bio_endio(struct request *rq, struct bio *bio, +- unsigned int nbytes, blk_status_t error) +-{ +- if (error) +- bio->bi_status = error; +- +- if (unlikely(rq->rq_flags & RQF_QUIET)) +- bio_set_flag(bio, BIO_QUIET); +- +- bio_advance(bio, nbytes); +- +- if (req_op(rq) == REQ_OP_ZONE_APPEND && error == BLK_STS_OK) { +- /* +- * Partial zone append completions cannot be supported as the +- * BIO fragments may end up not being written sequentially. +- */ +- if (bio->bi_iter.bi_size) +- bio->bi_status = BLK_STS_IOERR; +- else +- bio->bi_iter.bi_sector = rq->__sector; +- } +- +- /* don't actually finish bio if it's part of flush sequence */ +- if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ)) +- bio_endio(bio); +-} +- + void blk_dump_rq_flags(struct request *rq, char *msg) + { + printk(KERN_INFO "%s: dev %s: flags=%llx\n", msg, +@@ -1311,17 +1284,6 @@ static void update_io_ticks(struct block_device *part, unsigned long now, + } + } + +-static void blk_account_io_completion(struct request *req, unsigned int bytes) +-{ +- if (req->part && blk_do_io_stat(req)) { +- const int sgrp = op_stat_group(req_op(req)); +- +- part_stat_lock(); +- part_stat_add(req->part, sectors[sgrp], bytes >> 9); +- part_stat_unlock(); +- } +-} +- + void __blk_account_io_done(struct request *req, u64 now) + { + const int sgrp = op_stat_group(req_op(req)); +@@ -1430,112 +1392,6 @@ void blk_steal_bios(struct bio_list *list, struct request *rq) + } + EXPORT_SYMBOL_GPL(blk_steal_bios); + +-/** +- * blk_update_request - Complete multiple bytes without completing the request +- * @req: the request being processed +- * @error: block status code +- * @nr_bytes: number of bytes to complete for @req +- * +- * Description: +- * Ends I/O on a number of bytes attached to @req, but doesn't complete +- * the request structure even if @req doesn't have leftover. +- * If @req has leftover, sets it up for the next range of segments. +- * +- * Passing the result of blk_rq_bytes() as @nr_bytes guarantees +- * %false return from this function. +- * +- * Note: +- * The RQF_SPECIAL_PAYLOAD flag is ignored on purpose in this function +- * except in the consistency check at the end of this function. +- * +- * Return: +- * %false - this request doesn't have any more data +- * %true - this request has more data +- **/ +-bool blk_update_request(struct request *req, blk_status_t error, +- unsigned int nr_bytes) +-{ +- int total_bytes; +- +- trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes); +- +- if (!req->bio) +- return false; +- +-#ifdef CONFIG_BLK_DEV_INTEGRITY +- if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ && +- error == BLK_STS_OK) +- req->q->integrity.profile->complete_fn(req, nr_bytes); +-#endif +- +- if (unlikely(error && !blk_rq_is_passthrough(req) && +- !(req->rq_flags & RQF_QUIET))) +- print_req_error(req, error); +- +- blk_account_io_completion(req, nr_bytes); +- +- total_bytes = 0; +- while (req->bio) { +- struct bio *bio = req->bio; +- unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes); +- +- if (bio_bytes == bio->bi_iter.bi_size) +- req->bio = bio->bi_next; +- +- /* Completion has already been traced */ +- bio_clear_flag(bio, BIO_TRACE_COMPLETION); +- req_bio_endio(req, bio, bio_bytes, error); +- +- total_bytes += bio_bytes; +- nr_bytes -= bio_bytes; +- +- if (!nr_bytes) +- break; +- } +- +- /* +- * completely done +- */ +- if (!req->bio) { +- /* +- * Reset counters so that the request stacking driver +- * can find how many bytes remain in the request +- * later. +- */ +- req->__data_len = 0; +- return false; +- } +- +- req->__data_len -= total_bytes; +- +- /* update sector only for requests with clear definition of sector */ +- if (!blk_rq_is_passthrough(req)) +- req->__sector += total_bytes >> 9; +- +- /* mixed attributes always follow the first bio */ +- if (req->rq_flags & RQF_MIXED_MERGE) { +- req->cmd_flags &= ~REQ_FAILFAST_MASK; +- req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK; +- } +- +- if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) { +- /* +- * If total number of sectors is less than the first segment +- * size, something has gone terribly wrong. +- */ +- if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { +- blk_dump_rq_flags(req, "request botched"); +- req->__data_len = blk_rq_cur_bytes(req); +- } +- +- /* recalculate the number of segments */ +- req->nr_phys_segments = blk_recalc_rq_segments(req); +- } +- +- return true; +-} +-EXPORT_SYMBOL_GPL(blk_update_request); +- + #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE + /** + * rq_flush_dcache_pages - Helper function to flush all pages in a request +diff --git a/block/blk-mq.c b/block/blk-mq.c +index b58878221f17..9cff9e8eada4 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -628,6 +628,150 @@ void blk_mq_free_plug_rqs(struct blk_plug *plug) + } + } + ++static void req_bio_endio(struct request *rq, struct bio *bio, ++ unsigned int nbytes, blk_status_t error) ++{ ++ if (error) ++ bio->bi_status = error; ++ ++ if (unlikely(rq->rq_flags & RQF_QUIET)) ++ bio_set_flag(bio, BIO_QUIET); ++ ++ bio_advance(bio, nbytes); ++ ++ if (req_op(rq) == REQ_OP_ZONE_APPEND && error == BLK_STS_OK) { ++ /* ++ * Partial zone append completions cannot be supported as the ++ * BIO fragments may end up not being written sequentially. ++ */ ++ if (bio->bi_iter.bi_size) ++ bio->bi_status = BLK_STS_IOERR; ++ else ++ bio->bi_iter.bi_sector = rq->__sector; ++ } ++ ++ /* don't actually finish bio if it's part of flush sequence */ ++ if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ)) ++ bio_endio(bio); ++} ++ ++static void blk_account_io_completion(struct request *req, unsigned int bytes) ++{ ++ if (req->part && blk_do_io_stat(req)) { ++ const int sgrp = op_stat_group(req_op(req)); ++ ++ part_stat_lock(); ++ part_stat_add(req->part, sectors[sgrp], bytes >> 9); ++ part_stat_unlock(); ++ } ++} ++ ++/** ++ * blk_update_request - Complete multiple bytes without completing the request ++ * @req: the request being processed ++ * @error: block status code ++ * @nr_bytes: number of bytes to complete for @req ++ * ++ * Description: ++ * Ends I/O on a number of bytes attached to @req, but doesn't complete ++ * the request structure even if @req doesn't have leftover. ++ * If @req has leftover, sets it up for the next range of segments. ++ * ++ * Passing the result of blk_rq_bytes() as @nr_bytes guarantees ++ * %false return from this function. ++ * ++ * Note: ++ * The RQF_SPECIAL_PAYLOAD flag is ignored on purpose in this function ++ * except in the consistency check at the end of this function. ++ * ++ * Return: ++ * %false - this request doesn't have any more data ++ * %true - this request has more data ++ **/ ++bool blk_update_request(struct request *req, blk_status_t error, ++ unsigned int nr_bytes) ++{ ++ int total_bytes; ++ ++ trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes); ++ ++ if (!req->bio) ++ return false; ++ ++#ifdef CONFIG_BLK_DEV_INTEGRITY ++ if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ && ++ error == BLK_STS_OK) ++ req->q->integrity.profile->complete_fn(req, nr_bytes); ++#endif ++ ++ if (unlikely(error && !blk_rq_is_passthrough(req) && ++ !(req->rq_flags & RQF_QUIET))) ++ blk_print_req_error(req, error); ++ ++ blk_account_io_completion(req, nr_bytes); ++ ++ total_bytes = 0; ++ while (req->bio) { ++ struct bio *bio = req->bio; ++ unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes); ++ ++ if (bio_bytes == bio->bi_iter.bi_size) ++ req->bio = bio->bi_next; ++ ++ /* Completion has already been traced */ ++ bio_clear_flag(bio, BIO_TRACE_COMPLETION); ++ req_bio_endio(req, bio, bio_bytes, error); ++ ++ total_bytes += bio_bytes; ++ nr_bytes -= bio_bytes; ++ ++ if (!nr_bytes) ++ break; ++ } ++ ++ /* ++ * completely done ++ */ ++ if (!req->bio) { ++ /* ++ * Reset counters so that the request stacking driver ++ * can find how many bytes remain in the request ++ * later. ++ */ ++ req->__data_len = 0; ++ return false; ++ } ++ ++ req->__data_len -= total_bytes; ++ ++ /* update sector only for requests with clear definition of sector */ ++ if (!blk_rq_is_passthrough(req)) ++ req->__sector += total_bytes >> 9; ++ ++ /* mixed attributes always follow the first bio */ ++ if (req->rq_flags & RQF_MIXED_MERGE) { ++ req->cmd_flags &= ~REQ_FAILFAST_MASK; ++ req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK; ++ } ++ ++ if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) { ++ /* ++ * If total number of sectors is less than the first segment ++ * size, something has gone terribly wrong. ++ */ ++ if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { ++ blk_dump_rq_flags(req, "request botched"); ++ req->__data_len = blk_rq_cur_bytes(req); ++ } ++ ++ /* recalculate the number of segments */ ++ req->nr_phys_segments = blk_recalc_rq_segments(req); ++ } ++ ++ return true; ++} ++EXPORT_SYMBOL_GPL(blk_update_request); ++ + inline void __blk_mq_end_request(struct request *rq, blk_status_t error) + { + if (blk_mq_need_time_stamp(rq)) { +diff --git a/block/blk.h b/block/blk.h +index 447a2defe2c8..e80350327e6d 100644 +--- a/block/blk.h ++++ b/block/blk.h +@@ -215,6 +215,7 @@ static inline void blk_integrity_del(struct gendisk *disk) + + unsigned long blk_rq_timeout(unsigned long timeout); + void blk_add_timer(struct request *req); ++void blk_print_req_error(struct request *req, blk_status_t status); + + bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, + unsigned int nr_segs, struct request **same_queue_rq); +-- +2.35.3 + diff --git a/patches.suse/block-only-check-previous-entry-for-plug-merge-attem.patch b/patches.suse/block-only-check-previous-entry-for-plug-merge-attem.patch new file mode 100644 index 0000000..f3ed146 --- /dev/null +++ b/patches.suse/block-only-check-previous-entry-for-plug-merge-attem.patch @@ -0,0 +1,99 @@ +From: Jens Axboe +Date: Thu, 14 Oct 2021 07:24:07 -0600 +Subject: [PATCH] block: only check previous entry for plug merge attempt +Git-commit: d38a9c04c0d5637a828269dccb9703d42d40d42b +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Currently we scan the entire plug list, which is potentially very +expensive. In an IOPS bound workload, we can drive about 5.6M IOPS with +merging enabled, and profiling shows that the plug merge check is the +(by far) most expensive thing we're doing: + + Overhead Command Shared Object Symbol + + 20.89% io_uring [kernel.vmlinux] [k] blk_attempt_plug_merge + + 4.98% io_uring [kernel.vmlinux] [k] io_submit_sqes + + 4.78% io_uring [kernel.vmlinux] [k] blkdev_direct_IO + + 4.61% io_uring [kernel.vmlinux] [k] blk_mq_submit_bio + +Instead of browsing the whole list, just check the previously inserted +entry. That is enough for a naive merge check and will catch most cases, +and for devices that need full merging, the IO scheduler attached to +such devices will do that anyway. The plug merge is meant to be an +inexpensive check to avoid getting a request, but if we repeatedly +scan the list for every single insert, it is very much not a cheap +check. + +With this patch, the workload instead runs at ~7.0M IOPS, providing +a 25% improvement. Disabling merging entirely yields another 5% +improvement. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-merge.c | 36 +++++++++++++----------------------- + 1 file changed, 13 insertions(+), 23 deletions(-) + +diff --git a/block/blk-merge.c b/block/blk-merge.c +index 14ce19607cd8..9b77b4d6c2a1 100644 +--- a/block/blk-merge.c ++++ b/block/blk-merge.c +@@ -1084,8 +1084,8 @@ static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q, + * another request associated with @q is found on the plug list + * (optional, may be %NULL) + * +- * Determine whether @bio being queued on @q can be merged with a request +- * on %current's plugged list. Returns %true if merge was successful, ++ * Determine whether @bio being queued on @q can be merged with the previous ++ * request on %current's plugged list. Returns %true if merge was successful, + * otherwise %false. + * + * Plugging coalesces IOs from the same issuer for the same purpose without +@@ -1102,32 +1102,22 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, + { + struct blk_plug *plug; + struct request *rq; +- struct list_head *plug_list; + + plug = blk_mq_plug(q, bio); +- if (!plug) ++ if (!plug || list_empty(&plug->mq_list)) + return false; + +- plug_list = &plug->mq_list; +- +- list_for_each_entry_reverse(rq, plug_list, queuelist) { +- if (rq->q == q && same_queue_rq) { +- /* +- * Only blk-mq multiple hardware queues case checks the +- * rq in the same queue, there should be only one such +- * rq in a queue +- **/ +- *same_queue_rq = rq; +- } +- +- if (rq->q != q) +- continue; +- +- if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == +- BIO_MERGE_OK) +- return true; ++ /* check the previously added entry for a quick merge attempt */ ++ rq = list_last_entry(&plug->mq_list, struct request, queuelist); ++ if (rq->q == q && same_queue_rq) { ++ /* ++ * Only blk-mq multiple hardware queues case checks the rq in ++ * the same queue, there should be only one such rq in a queue ++ */ ++ *same_queue_rq = rq; + } +- ++ if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == BIO_MERGE_OK) ++ return true; + return false; + } + +-- +2.35.3 + diff --git a/patches.suse/block-optimise-blk_flush_plug_list.patch b/patches.suse/block-optimise-blk_flush_plug_list.patch new file mode 100644 index 0000000..985893b --- /dev/null +++ b/patches.suse/block-optimise-blk_flush_plug_list.patch @@ -0,0 +1,37 @@ +From: Pavel Begunkov +Date: Wed, 20 Oct 2021 16:41:18 +0200 +Subject: [PATCH] block: optimise blk_flush_plug_list +Git-commit: b600455d84307696b3cb7debdaf3081080748409 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Don't call flush_plug_callbacks if there are no plug callbacks. + +Signed-off-by: Pavel Begunkov +[hch: split from a larger patch] +Signed-off-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20211020144119.142582-4-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/block/blk-core.c b/block/blk-core.c +index 88752e51d2b6..db8b2fe0ceaf 100644 +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -1639,8 +1639,8 @@ EXPORT_SYMBOL(blk_check_plugged); + + void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) + { +- flush_plug_callbacks(plug, from_schedule); +- ++ if (!list_empty(&plug->cb_list)) ++ flush_plug_callbacks(plug, from_schedule); + if (!rq_list_empty(plug->mq_list)) + blk_mq_flush_plug_list(plug, from_schedule); + if (unlikely(!from_schedule && plug->cached_rq)) +-- +2.35.3 + diff --git a/patches.suse/block-optimise-boundary-blkdev_read_iter-s-checks.patch b/patches.suse/block-optimise-boundary-blkdev_read_iter-s-checks.patch new file mode 100644 index 0000000..f4817b7 --- /dev/null +++ b/patches.suse/block-optimise-boundary-blkdev_read_iter-s-checks.patch @@ -0,0 +1,55 @@ +From: Pavel Begunkov +Date: Wed, 20 Oct 2021 20:00:48 +0100 +Subject: [PATCH] block: optimise boundary blkdev_read_iter's checks +Git-commit: 6450fe1f668f410fe2ab69c79a52a0929a4a8296 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Combine pos and len checks and mark unlikely. Also, don't reexpand if +it's not truncated. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/fff34e613aeaae1ad12977dc4592cb1a1f5d3190.1634755800.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/fops.c | 19 +++++++++++-------- + 1 file changed, 11 insertions(+), 8 deletions(-) + +diff --git a/block/fops.c b/block/fops.c +index 21d25ee0e4bf..8f733c919ed1 100644 +--- a/block/fops.c ++++ b/block/fops.c +@@ -503,17 +503,20 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) + size_t shorted = 0; + ssize_t ret; + +- if (pos >= size) +- return 0; +- +- size -= pos; +- if (iov_iter_count(to) > size) { +- shorted = iov_iter_count(to) - size; +- iov_iter_truncate(to, size); ++ if (unlikely(pos + iov_iter_count(to) > size)) { ++ if (pos >= size) ++ return 0; ++ size -= pos; ++ if (iov_iter_count(to) > size) { ++ shorted = iov_iter_count(to) - size; ++ iov_iter_truncate(to, size); ++ } + } + + ret = generic_file_read_iter(iocb, to); +- iov_iter_reexpand(to, iov_iter_count(to) + shorted); ++ ++ if (unlikely(shorted)) ++ iov_iter_reexpand(to, iov_iter_count(to) + shorted); + return ret; + } + +-- +2.35.3 + diff --git a/patches.suse/block-optimise-req_bio_endio.patch b/patches.suse/block-optimise-req_bio_endio.patch new file mode 100644 index 0000000..bcf996f --- /dev/null +++ b/patches.suse/block-optimise-req_bio_endio.patch @@ -0,0 +1,60 @@ +From: Pavel Begunkov +Date: Tue, 19 Oct 2021 22:24:12 +0100 +Subject: [PATCH] block: optimise req_bio_endio() +Git-commit: 478eb72b815f33734723867ff236d96afa418d69 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +First, get rid of an extra branch and chain error checks. Also reshuffle +it with bio_advance(), so it goes closer to the final check, with that +the compiler loads rq->rq_flags only once, and also doesn't reload +bio->bi_iter.bi_size if bio_advance() didn't actually advanced the iter. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Pavel Begunkov +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 16 +++++++--------- + 1 file changed, 7 insertions(+), 9 deletions(-) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 31d9e612d236..8b05a8f9bb33 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -633,25 +633,23 @@ void blk_mq_free_plug_rqs(struct blk_plug *plug) + static void req_bio_endio(struct request *rq, struct bio *bio, + unsigned int nbytes, blk_status_t error) + { +- if (error) ++ if (unlikely(error)) { + bio->bi_status = error; +- +- if (unlikely(rq->rq_flags & RQF_QUIET)) +- bio_set_flag(bio, BIO_QUIET); +- +- bio_advance(bio, nbytes); +- +- if (req_op(rq) == REQ_OP_ZONE_APPEND && error == BLK_STS_OK) { ++ } else if (req_op(rq) == REQ_OP_ZONE_APPEND) { + /* + * Partial zone append completions cannot be supported as the + * BIO fragments may end up not being written sequentially. + */ +- if (bio->bi_iter.bi_size) ++ if (bio->bi_iter.bi_size == nbytes) + bio->bi_status = BLK_STS_IOERR; + else + bio->bi_iter.bi_sector = rq->__sector; + } + ++ bio_advance(bio, nbytes); ++ ++ if (unlikely(rq->rq_flags & RQF_QUIET)) ++ bio_set_flag(bio, BIO_QUIET); + /* don't actually finish bio if it's part of flush sequence */ + if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ)) + bio_endio(bio); +-- +2.35.3 + diff --git a/patches.suse/block-pass-in-blk_mq_tags-to-blk_mq_rq_ctx_init.patch b/patches.suse/block-pass-in-blk_mq_tags-to-blk_mq_rq_ctx_init.patch new file mode 100644 index 0000000..35e9f29 --- /dev/null +++ b/patches.suse/block-pass-in-blk_mq_tags-to-blk_mq_rq_ctx_init.patch @@ -0,0 +1,89 @@ +From: Jens Axboe +Date: Tue, 19 Oct 2021 09:32:58 -0600 +Subject: [PATCH] block: pass in blk_mq_tags to blk_mq_rq_ctx_init() +Git-commit: fe6134f66906dfa16d4877cab60106275f48eef7 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Instead of getting this from data for every invocation of request +initialization, pass it in as an argument instead. + +Signed-off-by: Jens Axboe +Link: https://lore.kernel.org/r/20211019153300.623322-3-axboe@kernel.dk +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 25 ++++++++++++++----------- + 1 file changed, 14 insertions(+), 11 deletions(-) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index a4d5b779a65a..e881e12a2691 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -316,12 +316,11 @@ void blk_mq_wake_waiters(struct request_queue *q) + } + + static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, +- unsigned int tag, u64 alloc_time_ns) ++ struct blk_mq_tags *tags, unsigned int tag, u64 alloc_time_ns) + { + struct blk_mq_ctx *ctx = data->ctx; + struct blk_mq_hw_ctx *hctx = data->hctx; + struct request_queue *q = data->q; +- struct blk_mq_tags *tags = blk_mq_tags_from_data(data); + struct request *rq = tags->static_rqs[tag]; + + if (!(data->rq_flags & RQF_ELV)) { +@@ -393,20 +392,22 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data, + u64 alloc_time_ns) + { + unsigned int tag, tag_offset; ++ struct blk_mq_tags *tags; + struct request *rq; +- unsigned long tags; ++ unsigned long tag_mask; + int i, nr = 0; + +- tags = blk_mq_get_tags(data, data->nr_tags, &tag_offset); +- if (unlikely(!tags)) ++ tag_mask = blk_mq_get_tags(data, data->nr_tags, &tag_offset); ++ if (unlikely(!tag_mask)) + return NULL; + +- for (i = 0; tags; i++) { +- if (!(tags & (1UL << i))) ++ tags = blk_mq_tags_from_data(data); ++ for (i = 0; tag_mask; i++) { ++ if (!(tag_mask & (1UL << i))) + continue; + tag = tag_offset + i; +- tags &= ~(1UL << i); +- rq = blk_mq_rq_ctx_init(data, tag, alloc_time_ns); ++ tag_mask &= ~(1UL << i); ++ rq = blk_mq_rq_ctx_init(data, tags, tag, alloc_time_ns); + rq_list_add(data->cached_rq, rq); + } + data->nr_tags -= nr; +@@ -477,7 +478,8 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data) + goto retry; + } + +- return blk_mq_rq_ctx_init(data, tag, alloc_time_ns); ++ return blk_mq_rq_ctx_init(data, blk_mq_tags_from_data(data), tag, ++ alloc_time_ns); + } + + struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, +@@ -563,7 +565,8 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, + tag = blk_mq_get_tag(&data); + if (tag == BLK_MQ_NO_TAG) + goto out_queue_exit; +- return blk_mq_rq_ctx_init(&data, tag, alloc_time_ns); ++ return blk_mq_rq_ctx_init(&data, blk_mq_tags_from_data(&data), tag, ++ alloc_time_ns); + + out_queue_exit: + blk_queue_exit(q); +-- +2.35.3 + diff --git a/patches.suse/block-prefetch-request-to-be-initialized.patch b/patches.suse/block-prefetch-request-to-be-initialized.patch new file mode 100644 index 0000000..8e17872 --- /dev/null +++ b/patches.suse/block-prefetch-request-to-be-initialized.patch @@ -0,0 +1,34 @@ +From: Jens Axboe +Date: Tue, 19 Oct 2021 09:32:59 -0600 +Subject: [PATCH] block: prefetch request to be initialized +Git-commit: 92aff191cc5b15a56d10a7a1a0b4bc5f6e17fcf3 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Now we have the tags available in __blk_mq_alloc_requests_batch(), we +can start fetching the first request cacheline before calling into the +request initialization. + +Signed-off-by: Jens Axboe +Link: https://lore.kernel.org/r/20211019153300.623322-4-axboe@kernel.dk +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index e881e12a2691..7cdb10f96aa6 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -405,6 +405,7 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data, + for (i = 0; tag_mask; i++) { + if (!(tag_mask & (1UL << i))) + continue; ++ prefetch(tags->static_rqs[tag]); + tag = tag_offset + i; + tag_mask &= ~(1UL << i); + rq = blk_mq_rq_ctx_init(data, tags, tag, alloc_time_ns); +-- +2.35.3 + diff --git a/patches.suse/block-provide-helpers-for-rq_list-manipulation.patch b/patches.suse/block-provide-helpers-for-rq_list-manipulation.patch new file mode 100644 index 0000000..72f78ec --- /dev/null +++ b/patches.suse/block-provide-helpers-for-rq_list-manipulation.patch @@ -0,0 +1,107 @@ +From: Jens Axboe +Date: Wed, 13 Oct 2021 07:58:52 -0600 +Subject: [PATCH] block: provide helpers for rq_list manipulation +Git-commit: 013a7f95438144f4ab39a1017a0bff2765d2551a +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Instead of open-coding the list additions, traversal, and removal, +provide a basic set of helpers. + +Suggested-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 19 +++++-------------- + include/linux/blkdev.h | 29 +++++++++++++++++++++++++++++ + 2 files changed, 34 insertions(+), 14 deletions(-) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index bd241fd7ee49..74505b545dd3 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -404,17 +404,11 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data, + tag = tag_offset + i; + tags &= ~(1UL << i); + rq = blk_mq_rq_ctx_init(data, tag, alloc_time_ns); +- rq->rq_next = *data->cached_rq; +- *data->cached_rq = rq; ++ rq_list_add(data->cached_rq, rq); + } + data->nr_tags -= nr; + +- if (!data->cached_rq) +- return NULL; +- +- rq = *data->cached_rq; +- *data->cached_rq = rq->rq_next; +- return rq; ++ return rq_list_pop(data->cached_rq); + } + + static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data) +@@ -622,11 +616,9 @@ EXPORT_SYMBOL_GPL(blk_mq_free_request); + + void blk_mq_free_plug_rqs(struct blk_plug *plug) + { +- while (plug->cached_rq) { +- struct request *rq; ++ struct request *rq; + +- rq = plug->cached_rq; +- plug->cached_rq = rq->rq_next; ++ while ((rq = rq_list_pop(&plug->cached_rq)) != NULL) { + percpu_ref_get(&rq->q->q_usage_counter); + blk_mq_free_request(rq); + } +@@ -2418,8 +2410,7 @@ void blk_mq_submit_bio(struct bio *bio) + + plug = blk_mq_plug(q, bio); + if (plug && plug->cached_rq) { +- rq = plug->cached_rq; +- plug->cached_rq = rq->rq_next; ++ rq = rq_list_pop(&plug->cached_rq); + INIT_LIST_HEAD(&rq->queuelist); + } else { + struct blk_mq_alloc_data data = { +diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h +index d5b21fc8f49e..b0a322172965 100644 +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -1298,4 +1298,33 @@ int fsync_bdev(struct block_device *bdev); + int freeze_bdev(struct block_device *bdev); + int thaw_bdev(struct block_device *bdev); + ++#define rq_list_add(listptr, rq) do { \ ++ (rq)->rq_next = *(listptr); \ ++ *(listptr) = rq; \ ++} while (0) ++ ++#define rq_list_pop(listptr) \ ++({ \ ++ struct request *__req = NULL; \ ++ if ((listptr) && *(listptr)) { \ ++ __req = *(listptr); \ ++ *(listptr) = __req->rq_next; \ ++ } \ ++ __req; \ ++}) ++ ++#define rq_list_peek(listptr) \ ++({ \ ++ struct request *__req = NULL; \ ++ if ((listptr) && *(listptr)) \ ++ __req = *(listptr); \ ++ __req; \ ++}) ++ ++#define rq_list_for_each(listptr, pos) \ ++ for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos)) \ ++ ++#define rq_list_next(rq) (rq)->rq_next ++#define rq_list_empty(list) ((list) == (struct request *) NULL) ++ + #endif /* _LINUX_BLKDEV_H */ +-- +2.35.3 + diff --git a/patches.suse/block-re-flow-blk_mq_rq_ctx_init.patch b/patches.suse/block-re-flow-blk_mq_rq_ctx_init.patch new file mode 100644 index 0000000..86aec53 --- /dev/null +++ b/patches.suse/block-re-flow-blk_mq_rq_ctx_init.patch @@ -0,0 +1,76 @@ +From: Jens Axboe +Date: Tue, 19 Oct 2021 09:33:00 -0600 +Subject: [PATCH] block: re-flow blk_mq_rq_ctx_init() +Git-commit: c7b84d4226adaa601e9f73574ef123d1500cf712 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Now that we have flags passed in, we can do a final re-arrange of the +flow of blk_mq_rq_ctx_init() so we're always writing request in the +order in which it is laid out. + +Signed-off-by: Jens Axboe +Link: https://lore.kernel.org/r/20211019153300.623322-5-axboe@kernel.dk +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 7cdb10f96aa6..ec966e0b172d 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -323,6 +323,17 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, + struct request_queue *q = data->q; + struct request *rq = tags->static_rqs[tag]; + ++ rq->q = q; ++ rq->mq_ctx = ctx; ++ rq->mq_hctx = hctx; ++ rq->cmd_flags = data->cmd_flags; ++ ++ if (data->flags & BLK_MQ_REQ_PM) ++ data->rq_flags |= RQF_PM; ++ if (blk_queue_io_stat(q)) ++ data->rq_flags |= RQF_IO_STAT; ++ rq->rq_flags = data->rq_flags; ++ + if (!(data->rq_flags & RQF_ELV)) { + rq->tag = tag; + rq->internal_tag = BLK_MQ_NO_TAG; +@@ -330,22 +341,12 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, + rq->tag = BLK_MQ_NO_TAG; + rq->internal_tag = tag; + } +- +- if (data->flags & BLK_MQ_REQ_PM) +- data->rq_flags |= RQF_PM; +- if (blk_queue_io_stat(q)) +- data->rq_flags |= RQF_IO_STAT; +- rq->rq_flags = data->rq_flags; ++ rq->timeout = 0; + + if (blk_mq_need_time_stamp(rq)) + rq->start_time_ns = ktime_get_ns(); + else + rq->start_time_ns = 0; +- /* csd/requeue_work/fifo_time is initialized before use */ +- rq->q = q; +- rq->mq_ctx = ctx; +- rq->mq_hctx = hctx; +- rq->cmd_flags = data->cmd_flags; + rq->rq_disk = NULL; + rq->part = NULL; + #ifdef CONFIG_BLK_RQ_ALLOC_TIME +@@ -357,7 +358,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, + #if defined(CONFIG_BLK_DEV_INTEGRITY) + rq->nr_integrity_segments = 0; + #endif +- rq->timeout = 0; + rq->end_io = NULL; + rq->end_io_data = NULL; + +-- +2.35.3 + diff --git a/patches.suse/block-refactor-bio_iov_bvec_set.patch b/patches.suse/block-refactor-bio_iov_bvec_set.patch new file mode 100644 index 0000000..138ceff --- /dev/null +++ b/patches.suse/block-refactor-bio_iov_bvec_set.patch @@ -0,0 +1,90 @@ +From: Pavel Begunkov +Date: Sat, 23 Oct 2021 17:21:33 +0100 +Subject: [PATCH] block: refactor bio_iov_bvec_set() +Git-commit: fa5fa8ec6077a954e1aeb206fece15255b19e7b9 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Combine bio_iov_bvec_set() and bio_iov_bvec_set_append() and let the +caller to do iov_iter_advance(). Also get rid of __bio_iov_bvec_set(), +which was duplicated in the final binary, and replace a weird +iov_iter_truncate() of a temporal iter copy with min() better reflecting +the intention. + +Signed-off-by: Pavel Begunkov +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/bcf1ac36fce769a514e19475f3623cd86a1d8b72.1635006010.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/bio.c | 37 ++++++++++++++----------------------- + 1 file changed, 14 insertions(+), 23 deletions(-) + +diff --git a/block/bio.c b/block/bio.c +index 46a87c72d2b4..ead1f8a9ff5e 100644 +--- a/block/bio.c ++++ b/block/bio.c +@@ -1046,36 +1046,27 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty) + } + EXPORT_SYMBOL_GPL(__bio_release_pages); + +-static void __bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter) ++static void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter) + { ++ size_t size = iov_iter_count(iter); ++ + WARN_ON_ONCE(bio->bi_max_vecs); + ++ if (bio_op(bio) == REQ_OP_ZONE_APPEND) { ++ struct request_queue *q = bdev_get_queue(bio->bi_bdev); ++ size_t max_sectors = queue_max_zone_append_sectors(q); ++ ++ size = min(size, max_sectors << SECTOR_SHIFT); ++ } ++ + bio->bi_vcnt = iter->nr_segs; + bio->bi_io_vec = (struct bio_vec *)iter->bvec; + bio->bi_iter.bi_bvec_done = iter->iov_offset; +- bio->bi_iter.bi_size = iter->count; ++ bio->bi_iter.bi_size = size; + bio_set_flag(bio, BIO_NO_PAGE_REF); + bio_set_flag(bio, BIO_CLONED); + } + +-static int bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter) +-{ +- __bio_iov_bvec_set(bio, iter); +- iov_iter_advance(iter, iter->count); +- return 0; +-} +- +-static int bio_iov_bvec_set_append(struct bio *bio, struct iov_iter *iter) +-{ +- struct request_queue *q = bdev_get_queue(bio->bi_bdev); +- struct iov_iter i = *iter; +- +- iov_iter_truncate(&i, queue_max_zone_append_sectors(q) << 9); +- __bio_iov_bvec_set(bio, &i); +- iov_iter_advance(iter, i.count); +- return 0; +-} +- + static void bio_put_pages(struct page **pages, size_t size, size_t off) + { + size_t i, nr = DIV_ROUND_UP(size + (off & ~PAGE_MASK), PAGE_SIZE); +@@ -1217,9 +1208,9 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) + int ret = 0; + + if (iov_iter_is_bvec(iter)) { +- if (bio_op(bio) == REQ_OP_ZONE_APPEND) +- return bio_iov_bvec_set_append(bio, iter); +- return bio_iov_bvec_set(bio, iter); ++ bio_iov_bvec_set(bio, iter); ++ iov_iter_advance(iter, bio->bi_iter.bi_size); ++ return 0; + } + + do { +-- +2.35.3 + diff --git a/patches.suse/block-remove-QUEUE_FLAG_SCSI_PASSTHROUGH.patch b/patches.suse/block-remove-QUEUE_FLAG_SCSI_PASSTHROUGH.patch new file mode 100644 index 0000000..1693c44 --- /dev/null +++ b/patches.suse/block-remove-QUEUE_FLAG_SCSI_PASSTHROUGH.patch @@ -0,0 +1,116 @@ +From: Christoph Hellwig +Date: Thu, 21 Oct 2021 08:06:07 +0200 +Subject: [PATCH] block: remove QUEUE_FLAG_SCSI_PASSTHROUGH +Git-commit: 4845012eb5b4e56cadb5f484cb55dd4fd9d1df80 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Export scsi_device_from_queue for use with pktcdvd and use that instead +of the otherwise unused QUEUE_FLAG_SCSI_PASSTHROUGH queue flag. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Hannes Reinecke +Link: https://lore.kernel.org/r/20211021060607.264371-8-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq-debugfs.c | 1 - + drivers/block/pktcdvd.c | 5 ++++- + drivers/scsi/scsi_lib.c | 8 ++++++++ + drivers/scsi/scsi_scan.c | 1 - + include/linux/blkdev.h | 3 --- + 5 files changed, 12 insertions(+), 6 deletions(-) + +diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c +index 68ca5d21cda7..a317f05de466 100644 +--- a/block/blk-mq-debugfs.c ++++ b/block/blk-mq-debugfs.c +@@ -124,7 +124,6 @@ static const char *const blk_queue_flag_name[] = { + QUEUE_FLAG_NAME(STATS), + QUEUE_FLAG_NAME(POLL_STATS), + QUEUE_FLAG_NAME(REGISTERED), +- QUEUE_FLAG_NAME(SCSI_PASSTHROUGH), + QUEUE_FLAG_NAME(QUIESCED), + QUEUE_FLAG_NAME(PCI_P2PDMA), + QUEUE_FLAG_NAME(ZONE_RESETALL), +diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c +index ea2262ec76d2..cacf64eedad8 100644 +--- a/drivers/block/pktcdvd.c ++++ b/drivers/block/pktcdvd.c +@@ -2536,6 +2536,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) + int i; + char b[BDEVNAME_SIZE]; + struct block_device *bdev; ++ struct scsi_device *sdev; + + if (pd->pkt_dev == dev) { + pkt_err(pd, "recursive setup not allowed\n"); +@@ -2559,10 +2560,12 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) + bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_NDELAY, NULL); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); +- if (!blk_queue_scsi_passthrough(bdev_get_queue(bdev))) { ++ sdev = scsi_device_from_queue(bdev->bd_disk->queue); ++ if (!sdev) { + blkdev_put(bdev, FMODE_READ | FMODE_NDELAY); + return -EINVAL; + } ++ put_device(&sdev->sdev_gendev); + + /* This is safe, since we have a reference from open(). */ + __module_get(THIS_MODULE); +diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c +index a0f801fc8943..9823b65d1536 100644 +--- a/drivers/scsi/scsi_lib.c ++++ b/drivers/scsi/scsi_lib.c +@@ -1967,6 +1967,14 @@ struct scsi_device *scsi_device_from_queue(struct request_queue *q) + + return sdev; + } ++/* ++ * pktcdvd should have been integrated into the SCSI layers, but for historical ++ * reasons like the old IDE driver it isn't. This export allows it to safely ++ * probe if a given device is a SCSI one and only attach to that. ++ */ ++#ifdef CONFIG_CDROM_PKTCDVD_MODULE ++EXPORT_SYMBOL_GPL(scsi_device_from_queue); ++#endif + + /** + * scsi_block_requests - Utility function used by low-level drivers to prevent +diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c +index fe22191522a3..2808c0cb5711 100644 +--- a/drivers/scsi/scsi_scan.c ++++ b/drivers/scsi/scsi_scan.c +@@ -280,7 +280,6 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, + sdev->request_queue = q; + q->queuedata = sdev; + __scsi_init_queue(sdev->host, q); +- blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q); + WARN_ON_ONCE(!blk_get_queue(q)); + + depth = sdev->host->cmd_per_lun ?: 1; +diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h +index 0d5826066e16..1ad30f85d30e 100644 +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -357,7 +357,6 @@ struct request_queue { + #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ + #define QUEUE_FLAG_POLL_STATS 21 /* collecting stats for hybrid polling */ + #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ +-#define QUEUE_FLAG_SCSI_PASSTHROUGH 23 /* queue supports SCSI commands */ + #define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ + #define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */ + #define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */ +@@ -391,8 +390,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); + #define blk_queue_secure_erase(q) \ + (test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags)) + #define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) +-#define blk_queue_scsi_passthrough(q) \ +- test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags) + #define blk_queue_pci_p2pdma(q) \ + test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags) + #ifdef CONFIG_BLK_RQ_ALLOC_TIME +-- +2.35.3 + diff --git a/patches.suse/block-remove-debugfs-blk_mq_ctx-dispatched-merged-co.patch b/patches.suse/block-remove-debugfs-blk_mq_ctx-dispatched-merged-co.patch new file mode 100644 index 0000000..c4ce144 --- /dev/null +++ b/patches.suse/block-remove-debugfs-blk_mq_ctx-dispatched-merged-co.patch @@ -0,0 +1,165 @@ +From: Jens Axboe +Date: Sat, 16 Oct 2021 17:27:20 -0600 +Subject: [PATCH] block: remove debugfs blk_mq_ctx dispatched/merged/completed +Git-commit: 9a14d6ce4135fa72705a926c894218a0d6988924 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + attributes + +These were added as part of early days debugging for blk-mq, and they +are not really useful anymore. Rather than spend cycles updating them, +just get rid of them. + +As a bonus, this shrinks the per-cpu software queue size from 256b +to 192b. That's a whole cacheline less. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq-debugfs.c | 54 ------------------------------------------ + block/blk-mq-sched.c | 5 +--- + block/blk-mq.c | 3 --- + block/blk-mq.h | 7 ------ + 4 files changed, 1 insertion(+), 68 deletions(-) + +diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c +index 409a8256d9ff..928a16af9175 100644 +--- a/block/blk-mq-debugfs.c ++++ b/block/blk-mq-debugfs.c +@@ -663,57 +663,6 @@ CTX_RQ_SEQ_OPS(default, HCTX_TYPE_DEFAULT); + CTX_RQ_SEQ_OPS(read, HCTX_TYPE_READ); + CTX_RQ_SEQ_OPS(poll, HCTX_TYPE_POLL); + +-static int ctx_dispatched_show(void *data, struct seq_file *m) +-{ +- struct blk_mq_ctx *ctx = data; +- +- seq_printf(m, "%lu %lu\n", ctx->rq_dispatched[1], ctx->rq_dispatched[0]); +- return 0; +-} +- +-static ssize_t ctx_dispatched_write(void *data, const char __user *buf, +- size_t count, loff_t *ppos) +-{ +- struct blk_mq_ctx *ctx = data; +- +- ctx->rq_dispatched[0] = ctx->rq_dispatched[1] = 0; +- return count; +-} +- +-static int ctx_merged_show(void *data, struct seq_file *m) +-{ +- struct blk_mq_ctx *ctx = data; +- +- seq_printf(m, "%lu\n", ctx->rq_merged); +- return 0; +-} +- +-static ssize_t ctx_merged_write(void *data, const char __user *buf, +- size_t count, loff_t *ppos) +-{ +- struct blk_mq_ctx *ctx = data; +- +- ctx->rq_merged = 0; +- return count; +-} +- +-static int ctx_completed_show(void *data, struct seq_file *m) +-{ +- struct blk_mq_ctx *ctx = data; +- +- seq_printf(m, "%lu %lu\n", ctx->rq_completed[1], ctx->rq_completed[0]); +- return 0; +-} +- +-static ssize_t ctx_completed_write(void *data, const char __user *buf, +- size_t count, loff_t *ppos) +-{ +- struct blk_mq_ctx *ctx = data; +- +- ctx->rq_completed[0] = ctx->rq_completed[1] = 0; +- return count; +-} +- + static int blk_mq_debugfs_show(struct seq_file *m, void *v) + { + const struct blk_mq_debugfs_attr *attr = m->private; +@@ -803,9 +752,6 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = { + {"default_rq_list", 0400, .seq_ops = &ctx_default_rq_list_seq_ops}, + {"read_rq_list", 0400, .seq_ops = &ctx_read_rq_list_seq_ops}, + {"poll_rq_list", 0400, .seq_ops = &ctx_poll_rq_list_seq_ops}, +- {"dispatched", 0600, ctx_dispatched_show, ctx_dispatched_write}, +- {"merged", 0600, ctx_merged_show, ctx_merged_write}, +- {"completed", 0600, ctx_completed_show, ctx_completed_write}, + {}, + }; + +diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c +index efc1374b8831..e85b7556b096 100644 +--- a/block/blk-mq-sched.c ++++ b/block/blk-mq-sched.c +@@ -387,13 +387,10 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, + * potentially merge with. Currently includes a hand-wavy stop + * count of 8, to not spend too much time checking for merges. + */ +- if (blk_bio_list_merge(q, &ctx->rq_lists[type], bio, nr_segs)) { +- ctx->rq_merged++; ++ if (blk_bio_list_merge(q, &ctx->rq_lists[type], bio, nr_segs)) + ret = true; +- } + + spin_unlock(&ctx->lock); +- + return ret; + } + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 633d73580712..990d214a7658 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -359,7 +359,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, + rq->end_io = NULL; + rq->end_io_data = NULL; + +- data->ctx->rq_dispatched[op_is_sync(data->cmd_flags)]++; + blk_crypto_rq_set_defaults(rq); + INIT_LIST_HEAD(&rq->queuelist); + /* tag was already set */ +@@ -595,7 +594,6 @@ static void __blk_mq_free_request(struct request *rq) + void blk_mq_free_request(struct request *rq) + { + struct request_queue *q = rq->q; +- struct blk_mq_ctx *ctx = rq->mq_ctx; + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; + + if (rq->rq_flags & (RQF_ELVPRIV | RQF_ELV)) { +@@ -609,7 +607,6 @@ void blk_mq_free_request(struct request *rq) + } + } + +- ctx->rq_completed[rq_is_sync(rq)]++; + if (rq->rq_flags & RQF_MQ_INFLIGHT) + __blk_mq_dec_active_requests(hctx); + +diff --git a/block/blk-mq.h b/block/blk-mq.h +index 8be447995106..1b91a3fdaa01 100644 +--- a/block/blk-mq.h ++++ b/block/blk-mq.h +@@ -25,13 +25,6 @@ struct blk_mq_ctx { + unsigned short index_hw[HCTX_MAX_TYPES]; + struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; + +- /* incremented at dispatch time */ +- unsigned long rq_dispatched[2]; +- unsigned long rq_merged; +- +- /* incremented at completion time */ +- unsigned long ____cacheline_aligned_in_smp rq_completed[2]; +- + struct request_queue *queue; + struct blk_mq_ctxs *ctxs; + struct kobject kobj; +-- +2.35.3 + diff --git a/patches.suse/block-remove-redundant-y-from-BLK_CGROUP-dependency.patch b/patches.suse/block-remove-redundant-y-from-BLK_CGROUP-dependency.patch new file mode 100644 index 0000000..6616fc5 --- /dev/null +++ b/patches.suse/block-remove-redundant-y-from-BLK_CGROUP-dependency.patch @@ -0,0 +1,53 @@ +From: Masahiro Yamada +Date: Mon, 27 Sep 2021 22:59:57 +0900 +Subject: [PATCH] block: remove redundant =y from BLK_CGROUP dependency +Git-commit: df252bde82ac19324b26192ea5e7527fbc1b6033 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +CONFIG_BLK_CGROUP is a boolean option, that is, its value is 'y' or 'n'. +The comparison to 'y' is redundant. + +Signed-off-by: Masahiro Yamada +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20210927140000.866249-2-masahiroy@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/Kconfig | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/block/Kconfig b/block/Kconfig +index 8e28ae7718bd..1d83504749e7 100644 +--- a/block/Kconfig ++++ b/block/Kconfig +@@ -73,7 +73,7 @@ config BLK_DEV_ZONED + + config BLK_DEV_THROTTLING + bool "Block layer bio throttling support" +- depends on BLK_CGROUP=y ++ depends on BLK_CGROUP + select BLK_CGROUP_RWSTAT + help + Block layer bio throttling support. It can be used to limit +@@ -112,7 +112,7 @@ config BLK_WBT_MQ + + config BLK_CGROUP_IOLATENCY + bool "Enable support for latency based cgroup IO protection" +- depends on BLK_CGROUP=y ++ depends on BLK_CGROUP + help + Enabling this option enables the .latency interface for IO throttling. + The IO controller will attempt to maintain average IO latencies below +@@ -132,7 +132,7 @@ config BLK_CGROUP_FC_APPID + + config BLK_CGROUP_IOCOST + bool "Enable support for cost model based cgroup IO controller" +- depends on BLK_CGROUP=y ++ depends on BLK_CGROUP + select BLK_RQ_IO_DATA_LEN + select BLK_RQ_ALLOC_TIME + help +-- +2.35.3 + diff --git a/patches.suse/block-remove-some-blk_mq_hw_ctx-debugfs-entries.patch b/patches.suse/block-remove-some-blk_mq_hw_ctx-debugfs-entries.patch new file mode 100644 index 0000000..e7958b4 --- /dev/null +++ b/patches.suse/block-remove-some-blk_mq_hw_ctx-debugfs-entries.patch @@ -0,0 +1,196 @@ +From: Jens Axboe +Date: Mon, 18 Oct 2021 08:53:19 -0600 +Subject: [PATCH] block: remove some blk_mq_hw_ctx debugfs entries +Git-commit: afd7de03c5268f74202c1dd4780a8532a11f4c6b +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Just like the blk_mq_ctx counterparts, we've got a bunch of counters +in here that are only for debugfs and are of questionnable value. They +are: + +- dispatched, index of how many requests were dispatched in one go + +- poll_{considered,invoked,success}, which track poll sucess rates. We're + confident in the iopoll implementation at this point, don't bother + tracking these. + +As a bonus, this shrinks each hardware queue from 576 bytes to 512 bytes, +dropping a whole cacheline. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq-debugfs.c | 67 ------------------------------------------ + block/blk-mq.c | 16 ---------- + include/linux/blk-mq.h | 10 ------- + 3 files changed, 93 deletions(-) + +diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c +index 928a16af9175..68ca5d21cda7 100644 +--- a/block/blk-mq-debugfs.c ++++ b/block/blk-mq-debugfs.c +@@ -529,70 +529,6 @@ static int hctx_sched_tags_bitmap_show(void *data, struct seq_file *m) + return res; + } + +-static int hctx_io_poll_show(void *data, struct seq_file *m) +-{ +- struct blk_mq_hw_ctx *hctx = data; +- +- seq_printf(m, "considered=%lu\n", hctx->poll_considered); +- seq_printf(m, "invoked=%lu\n", hctx->poll_invoked); +- seq_printf(m, "success=%lu\n", hctx->poll_success); +- return 0; +-} +- +-static ssize_t hctx_io_poll_write(void *data, const char __user *buf, +- size_t count, loff_t *ppos) +-{ +- struct blk_mq_hw_ctx *hctx = data; +- +- hctx->poll_considered = hctx->poll_invoked = hctx->poll_success = 0; +- return count; +-} +- +-static int hctx_dispatched_show(void *data, struct seq_file *m) +-{ +- struct blk_mq_hw_ctx *hctx = data; +- int i; +- +- seq_printf(m, "%8u\t%lu\n", 0U, hctx->dispatched[0]); +- +- for (i = 1; i < BLK_MQ_MAX_DISPATCH_ORDER - 1; i++) { +- unsigned int d = 1U << (i - 1); +- +- seq_printf(m, "%8u\t%lu\n", d, hctx->dispatched[i]); +- } +- +- seq_printf(m, "%8u+\t%lu\n", 1U << (i - 1), hctx->dispatched[i]); +- return 0; +-} +- +-static ssize_t hctx_dispatched_write(void *data, const char __user *buf, +- size_t count, loff_t *ppos) +-{ +- struct blk_mq_hw_ctx *hctx = data; +- int i; +- +- for (i = 0; i < BLK_MQ_MAX_DISPATCH_ORDER; i++) +- hctx->dispatched[i] = 0; +- return count; +-} +- +-static int hctx_queued_show(void *data, struct seq_file *m) +-{ +- struct blk_mq_hw_ctx *hctx = data; +- +- seq_printf(m, "%lu\n", hctx->queued); +- return 0; +-} +- +-static ssize_t hctx_queued_write(void *data, const char __user *buf, +- size_t count, loff_t *ppos) +-{ +- struct blk_mq_hw_ctx *hctx = data; +- +- hctx->queued = 0; +- return count; +-} +- + static int hctx_run_show(void *data, struct seq_file *m) + { + struct blk_mq_hw_ctx *hctx = data; +@@ -738,9 +674,6 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = { + {"tags_bitmap", 0400, hctx_tags_bitmap_show}, + {"sched_tags", 0400, hctx_sched_tags_show}, + {"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show}, +- {"io_poll", 0600, hctx_io_poll_show, hctx_io_poll_write}, +- {"dispatched", 0600, hctx_dispatched_show, hctx_dispatched_write}, +- {"queued", 0600, hctx_queued_show, hctx_queued_write}, + {"run", 0600, hctx_run_show, hctx_run_write}, + {"active", 0400, hctx_active_show}, + {"dispatch_busy", 0400, hctx_dispatch_busy_show}, +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 990d214a7658..bd241fd7ee49 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -382,7 +382,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, + } + } + +- data->hctx->queued++; + return rq; + } + +@@ -1301,14 +1300,6 @@ struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, + return data.rq; + } + +-static inline unsigned int queued_to_index(unsigned int queued) +-{ +- if (!queued) +- return 0; +- +- return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1); +-} +- + static bool __blk_mq_get_driver_tag(struct request *rq) + { + struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags; +@@ -1632,8 +1623,6 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, + if (!list_empty(&zone_list)) + list_splice_tail_init(&zone_list, list); + +- hctx->dispatched[queued_to_index(queued)]++; +- + /* If we didn't flush the entire list, we could have told the driver + * there was more coming, but that turned out to be a lie. + */ +@@ -4200,14 +4189,9 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie, + long state = get_current_state(); + int ret; + +- hctx->poll_considered++; +- + do { +- hctx->poll_invoked++; +- + ret = q->mq_ops->poll(hctx); + if (ret > 0) { +- hctx->poll_success++; + __set_current_state(TASK_RUNNING); + return ret; + } +diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h +index 95c3bd3a008e..9fb8618fb957 100644 +--- a/include/linux/blk-mq.h ++++ b/include/linux/blk-mq.h +@@ -341,9 +341,6 @@ struct blk_mq_hw_ctx { + unsigned long queued; + /** @run: Number of dispatched requests. */ + unsigned long run; +-#define BLK_MQ_MAX_DISPATCH_ORDER 7 +- /** @dispatched: Number of dispatch requests by queue. */ +- unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; + + /** @numa_node: NUMA node the storage adapter has been connected to. */ + unsigned int numa_node; +@@ -363,13 +360,6 @@ struct blk_mq_hw_ctx { + /** @kobj: Kernel object for sysfs. */ + struct kobject kobj; + +- /** @poll_considered: Count times blk_mq_poll() was called. */ +- unsigned long poll_considered; +- /** @poll_invoked: Count how many requests blk_mq_poll() polled. */ +- unsigned long poll_invoked; +- /** @poll_success: Count how many polled requests were completed. */ +- unsigned long poll_success; +- + #ifdef CONFIG_BLK_DEBUG_FS + /** + * @debugfs_dir: debugfs directory for this hardware queue. Named +-- +2.35.3 + diff --git a/patches.suse/block-remove-support-for-cryptoloop-and-the-xor-tran.patch b/patches.suse/block-remove-support-for-cryptoloop-and-the-xor-tran.patch new file mode 100644 index 0000000..f2e9032 --- /dev/null +++ b/patches.suse/block-remove-support-for-cryptoloop-and-the-xor-tran.patch @@ -0,0 +1,914 @@ +From: Christoph Hellwig +Date: Tue, 19 Oct 2021 09:56:39 +0200 +Subject: [PATCH] block: remove support for cryptoloop and the xor transfer +Git-commit: 47e9624616c80c9879feda536c48c6a3a0ed9835 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Support for cyrptoloop has been officially marked broken and deprecated +in favor of dm-crypt (which supports the same broken algorithms if +needed) in Linux 2.6.4 (released in March 2004), and support for it has +been entirely removed from losetup in util-linux 2.23 (released in April +2013). The XOR transfer has never been more than a toy to demonstrate +the transfer in the bad old times of crypto export restrictions. +Remove them as they have some nasty interactions with loop device life +times due to the iteration over all loop devices in +loop_unregister_transfer. + +Suggested-by: Milan Broz +Signed-off-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20211019075639.2333969-1-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/Kconfig | 23 --- + drivers/block/Makefile | 1 - + drivers/block/cryptoloop.c | 206 -------------------- + drivers/block/loop.c | 376 +++---------------------------------- + drivers/block/loop.h | 30 --- + 5 files changed, 26 insertions(+), 610 deletions(-) + delete mode 100644 drivers/block/cryptoloop.c + +diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig +index ab3e37aa1830..f7f32eeaee63 100644 +--- a/drivers/block/Kconfig ++++ b/drivers/block/Kconfig +@@ -180,14 +180,6 @@ config BLK_DEV_LOOP + bits of, say, a sound file). This is also safe if the file resides + on a remote file server. + +- There are several ways of encrypting disks. Some of these require +- kernel patches. The vanilla kernel offers the cryptoloop option +- and a Device Mapper target (which is superior, as it supports all +- file systems). If you want to use the cryptoloop, say Y to both +- LOOP and CRYPTOLOOP, and make sure you have a recent (version 2.12 +- or later) version of util-linux. Additionally, be aware that +- the cryptoloop is not safe for storing journaled filesystems. +- + Note that this loop device has nothing to do with the loopback + device used for network connections from the machine to itself. + +@@ -211,21 +203,6 @@ config BLK_DEV_LOOP_MIN_COUNT + is used, it can be set to 0, since needed loop devices can be + dynamically allocated with the /dev/loop-control interface. + +-config BLK_DEV_CRYPTOLOOP +- tristate "Cryptoloop Support (DEPRECATED)" +- select CRYPTO +- select CRYPTO_CBC +- depends on BLK_DEV_LOOP +- help +- Say Y here if you want to be able to use the ciphers that are +- provided by the CryptoAPI as loop transformation. This might be +- used as hard disk encryption. +- +- WARNING: This device is not safe for journaled file systems like +- ext3 or Reiserfs. Please use the Device Mapper crypto module +- instead, which can be configured to be on-disk compatible with the +- cryptoloop device. cryptoloop support will be removed in Linux 5.16. +- + source "drivers/block/drbd/Kconfig" + + config BLK_DEV_NBD +diff --git a/drivers/block/Makefile b/drivers/block/Makefile +index bc68817ef496..11a74f17c9ad 100644 +--- a/drivers/block/Makefile ++++ b/drivers/block/Makefile +@@ -24,7 +24,6 @@ obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o + obj-$(CONFIG_SUNVDC) += sunvdc.o + + obj-$(CONFIG_BLK_DEV_NBD) += nbd.o +-obj-$(CONFIG_BLK_DEV_CRYPTOLOOP) += cryptoloop.o + obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o + + obj-$(CONFIG_BLK_DEV_SX8) += sx8.o +diff --git a/drivers/block/cryptoloop.c b/drivers/block/cryptoloop.c +deleted file mode 100644 +index f0a91faa43a8..000000000000 +--- a/drivers/block/cryptoloop.c ++++ /dev/null +@@ -1,206 +0,0 @@ +-// SPDX-License-Identifier: GPL-2.0-or-later +-/* +- Linux loop encryption enabling module +- +- Copyright (C) 2002 Herbert Valerio Riedel +- Copyright (C) 2003 Fruhwirth Clemens +- +- */ +- +-#include +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include "loop.h" +- +-MODULE_LICENSE("GPL"); +-MODULE_DESCRIPTION("loop blockdevice transferfunction adaptor / CryptoAPI"); +-MODULE_AUTHOR("Herbert Valerio Riedel "); +- +-#define LOOP_IV_SECTOR_BITS 9 +-#define LOOP_IV_SECTOR_SIZE (1 << LOOP_IV_SECTOR_BITS) +- +-static int +-cryptoloop_init(struct loop_device *lo, const struct loop_info64 *info) +-{ +- int err = -EINVAL; +- int cipher_len; +- int mode_len; +- char cms[LO_NAME_SIZE]; /* cipher-mode string */ +- char *mode; +- char *cmsp = cms; /* c-m string pointer */ +- struct crypto_sync_skcipher *tfm; +- +- /* encryption breaks for non sector aligned offsets */ +- +- if (info->lo_offset % LOOP_IV_SECTOR_SIZE) +- goto out; +- +- strncpy(cms, info->lo_crypt_name, LO_NAME_SIZE); +- cms[LO_NAME_SIZE - 1] = 0; +- +- cipher_len = strcspn(cmsp, "-"); +- +- mode = cmsp + cipher_len; +- mode_len = 0; +- if (*mode) { +- mode++; +- mode_len = strcspn(mode, "-"); +- } +- +- if (!mode_len) { +- mode = "cbc"; +- mode_len = 3; +- } +- +- if (cipher_len + mode_len + 3 > LO_NAME_SIZE) +- return -EINVAL; +- +- memmove(cms, mode, mode_len); +- cmsp = cms + mode_len; +- *cmsp++ = '('; +- memcpy(cmsp, info->lo_crypt_name, cipher_len); +- cmsp += cipher_len; +- *cmsp++ = ')'; +- *cmsp = 0; +- +- tfm = crypto_alloc_sync_skcipher(cms, 0, 0); +- if (IS_ERR(tfm)) +- return PTR_ERR(tfm); +- +- err = crypto_sync_skcipher_setkey(tfm, info->lo_encrypt_key, +- info->lo_encrypt_key_size); +- +- if (err != 0) +- goto out_free_tfm; +- +- lo->key_data = tfm; +- return 0; +- +- out_free_tfm: +- crypto_free_sync_skcipher(tfm); +- +- out: +- return err; +-} +- +- +-typedef int (*encdec_cbc_t)(struct skcipher_request *req); +- +-static int +-cryptoloop_transfer(struct loop_device *lo, int cmd, +- struct page *raw_page, unsigned raw_off, +- struct page *loop_page, unsigned loop_off, +- int size, sector_t IV) +-{ +- struct crypto_sync_skcipher *tfm = lo->key_data; +- SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm); +- struct scatterlist sg_out; +- struct scatterlist sg_in; +- +- encdec_cbc_t encdecfunc; +- struct page *in_page, *out_page; +- unsigned in_offs, out_offs; +- int err; +- +- skcipher_request_set_sync_tfm(req, tfm); +- skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, +- NULL, NULL); +- +- sg_init_table(&sg_out, 1); +- sg_init_table(&sg_in, 1); +- +- if (cmd == READ) { +- in_page = raw_page; +- in_offs = raw_off; +- out_page = loop_page; +- out_offs = loop_off; +- encdecfunc = crypto_skcipher_decrypt; +- } else { +- in_page = loop_page; +- in_offs = loop_off; +- out_page = raw_page; +- out_offs = raw_off; +- encdecfunc = crypto_skcipher_encrypt; +- } +- +- while (size > 0) { +- const int sz = min(size, LOOP_IV_SECTOR_SIZE); +- u32 iv[4] = { 0, }; +- iv[0] = cpu_to_le32(IV & 0xffffffff); +- +- sg_set_page(&sg_in, in_page, sz, in_offs); +- sg_set_page(&sg_out, out_page, sz, out_offs); +- +- skcipher_request_set_crypt(req, &sg_in, &sg_out, sz, iv); +- err = encdecfunc(req); +- if (err) +- goto out; +- +- IV++; +- size -= sz; +- in_offs += sz; +- out_offs += sz; +- } +- +- err = 0; +- +-out: +- skcipher_request_zero(req); +- return err; +-} +- +-static int +-cryptoloop_ioctl(struct loop_device *lo, int cmd, unsigned long arg) +-{ +- return -EINVAL; +-} +- +-static int +-cryptoloop_release(struct loop_device *lo) +-{ +- struct crypto_sync_skcipher *tfm = lo->key_data; +- if (tfm != NULL) { +- crypto_free_sync_skcipher(tfm); +- lo->key_data = NULL; +- return 0; +- } +- printk(KERN_ERR "cryptoloop_release(): tfm == NULL?\n"); +- return -EINVAL; +-} +- +-static struct loop_func_table cryptoloop_funcs = { +- .number = LO_CRYPT_CRYPTOAPI, +- .init = cryptoloop_init, +- .ioctl = cryptoloop_ioctl, +- .transfer = cryptoloop_transfer, +- .release = cryptoloop_release, +- .owner = THIS_MODULE +-}; +- +-static int __init +-init_cryptoloop(void) +-{ +- int rc = loop_register_transfer(&cryptoloop_funcs); +- +- if (rc) +- printk(KERN_ERR "cryptoloop: loop_register_transfer failed\n"); +- else +- pr_warn("the cryptoloop driver has been deprecated and will be removed in in Linux 5.16\n"); +- return rc; +-} +- +-static void __exit +-cleanup_cryptoloop(void) +-{ +- if (loop_unregister_transfer(LO_CRYPT_CRYPTOAPI)) +- printk(KERN_ERR +- "cryptoloop: loop_unregister_transfer failed\n"); +-} +- +-module_init(init_cryptoloop); +-module_exit(cleanup_cryptoloop); +diff --git a/drivers/block/loop.c b/drivers/block/loop.c +index 00ee365ed5e0..302ac8f4f8ac 100644 +--- a/drivers/block/loop.c ++++ b/drivers/block/loop.c +@@ -133,58 +133,6 @@ static void loop_global_unlock(struct loop_device *lo, bool global) + static int max_part; + static int part_shift; + +-static int transfer_xor(struct loop_device *lo, int cmd, +- struct page *raw_page, unsigned raw_off, +- struct page *loop_page, unsigned loop_off, +- int size, sector_t real_block) +-{ +- char *raw_buf = kmap_atomic(raw_page) + raw_off; +- char *loop_buf = kmap_atomic(loop_page) + loop_off; +- char *in, *out, *key; +- int i, keysize; +- +- if (cmd == READ) { +- in = raw_buf; +- out = loop_buf; +- } else { +- in = loop_buf; +- out = raw_buf; +- } +- +- key = lo->lo_encrypt_key; +- keysize = lo->lo_encrypt_key_size; +- for (i = 0; i < size; i++) +- *out++ = *in++ ^ key[(i & 511) % keysize]; +- +- kunmap_atomic(loop_buf); +- kunmap_atomic(raw_buf); +- cond_resched(); +- return 0; +-} +- +-static int xor_init(struct loop_device *lo, const struct loop_info64 *info) +-{ +- if (unlikely(info->lo_encrypt_key_size <= 0)) +- return -EINVAL; +- return 0; +-} +- +-static struct loop_func_table none_funcs = { +- .number = LO_CRYPT_NONE, +-}; +- +-static struct loop_func_table xor_funcs = { +- .number = LO_CRYPT_XOR, +- .transfer = transfer_xor, +- .init = xor_init +-}; +- +-/* xfer_funcs[0] is special - its release function is never called */ +-static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { +- &none_funcs, +- &xor_funcs +-}; +- + static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file) + { + loff_t loopsize; +@@ -228,8 +176,7 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) + /* + * We support direct I/O only if lo_offset is aligned with the + * logical I/O size of backing device, and the logical block +- * size of loop is bigger than the backing device's and the loop +- * needn't transform transfer. ++ * size of loop is bigger than the backing device's. + * + * TODO: the above condition may be loosed in the future, and + * direct I/O may be switched runtime at that time because most +@@ -238,8 +185,7 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) + if (dio) { + if (queue_logical_block_size(lo->lo_queue) >= sb_bsize && + !(lo->lo_offset & dio_align) && +- mapping->a_ops->direct_IO && +- !lo->transfer) ++ mapping->a_ops->direct_IO) + use_dio = true; + else + use_dio = false; +@@ -299,24 +245,6 @@ static void loop_set_size(struct loop_device *lo, loff_t size) + kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); + } + +-static inline int +-lo_do_transfer(struct loop_device *lo, int cmd, +- struct page *rpage, unsigned roffs, +- struct page *lpage, unsigned loffs, +- int size, sector_t rblock) +-{ +- int ret; +- +- ret = lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock); +- if (likely(!ret)) +- return 0; +- +- printk_ratelimited(KERN_ERR +- "loop: Transfer error at byte offset %llu, length %i.\n", +- (unsigned long long)rblock << 9, size); +- return ret; +-} +- + static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos) + { + struct iov_iter i; +@@ -356,41 +284,6 @@ static int lo_write_simple(struct loop_device *lo, struct request *rq, + return ret; + } + +-/* +- * This is the slow, transforming version that needs to double buffer the +- * data as it cannot do the transformations in place without having direct +- * access to the destination pages of the backing file. +- */ +-static int lo_write_transfer(struct loop_device *lo, struct request *rq, +- loff_t pos) +-{ +- struct bio_vec bvec, b; +- struct req_iterator iter; +- struct page *page; +- int ret = 0; +- +- page = alloc_page(GFP_NOIO); +- if (unlikely(!page)) +- return -ENOMEM; +- +- rq_for_each_segment(bvec, rq, iter) { +- ret = lo_do_transfer(lo, WRITE, page, 0, bvec.bv_page, +- bvec.bv_offset, bvec.bv_len, pos >> 9); +- if (unlikely(ret)) +- break; +- +- b.bv_page = page; +- b.bv_offset = 0; +- b.bv_len = bvec.bv_len; +- ret = lo_write_bvec(lo->lo_backing_file, &b, &pos); +- if (ret < 0) +- break; +- } +- +- __free_page(page); +- return ret; +-} +- + static int lo_read_simple(struct loop_device *lo, struct request *rq, + loff_t pos) + { +@@ -420,64 +313,12 @@ static int lo_read_simple(struct loop_device *lo, struct request *rq, + return 0; + } + +-static int lo_read_transfer(struct loop_device *lo, struct request *rq, +- loff_t pos) +-{ +- struct bio_vec bvec, b; +- struct req_iterator iter; +- struct iov_iter i; +- struct page *page; +- ssize_t len; +- int ret = 0; +- +- page = alloc_page(GFP_NOIO); +- if (unlikely(!page)) +- return -ENOMEM; +- +- rq_for_each_segment(bvec, rq, iter) { +- loff_t offset = pos; +- +- b.bv_page = page; +- b.bv_offset = 0; +- b.bv_len = bvec.bv_len; +- +- iov_iter_bvec(&i, READ, &b, 1, b.bv_len); +- len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); +- if (len < 0) { +- ret = len; +- goto out_free_page; +- } +- +- ret = lo_do_transfer(lo, READ, page, 0, bvec.bv_page, +- bvec.bv_offset, len, offset >> 9); +- if (ret) +- goto out_free_page; +- +- flush_dcache_page(bvec.bv_page); +- +- if (len != bvec.bv_len) { +- struct bio *bio; +- +- __rq_for_each_bio(bio, rq) +- zero_fill_bio(bio); +- break; +- } +- } +- +- ret = 0; +-out_free_page: +- __free_page(page); +- return ret; +-} +- + static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos, + int mode) + { + /* + * We use fallocate to manipulate the space mappings used by the image +- * a.k.a. discard/zerorange. However we do not support this if +- * encryption is enabled, because it may give an attacker useful +- * information. ++ * a.k.a. discard/zerorange. + */ + struct file *file = lo->lo_backing_file; + struct request_queue *q = lo->lo_queue; +@@ -660,16 +501,12 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq) + case REQ_OP_DISCARD: + return lo_fallocate(lo, rq, pos, FALLOC_FL_PUNCH_HOLE); + case REQ_OP_WRITE: +- if (lo->transfer) +- return lo_write_transfer(lo, rq, pos); +- else if (cmd->use_aio) ++ if (cmd->use_aio) + return lo_rw_aio(lo, cmd, pos, WRITE); + else + return lo_write_simple(lo, rq, pos); + case REQ_OP_READ: +- if (lo->transfer) +- return lo_read_transfer(lo, rq, pos); +- else if (cmd->use_aio) ++ if (cmd->use_aio) + return lo_rw_aio(lo, cmd, pos, READ); + else + return lo_read_simple(lo, rq, pos); +@@ -934,7 +771,7 @@ static void loop_config_discard(struct loop_device *lo) + * not blkdev_issue_discard(). This maintains consistent behavior with + * file-backed loop devices: discarded regions read back as zero. + */ +- if (S_ISBLK(inode->i_mode) && !lo->lo_encrypt_key_size) { ++ if (S_ISBLK(inode->i_mode)) { + struct request_queue *backingq = bdev_get_queue(I_BDEV(inode)); + + max_discard_sectors = backingq->limits.max_write_zeroes_sectors; +@@ -943,11 +780,9 @@ static void loop_config_discard(struct loop_device *lo) + + /* + * We use punch hole to reclaim the free space used by the +- * image a.k.a. discard. However we do not support discard if +- * encryption is enabled, because it may give an attacker +- * useful information. ++ * image a.k.a. discard. + */ +- } else if (!file->f_op->fallocate || lo->lo_encrypt_key_size) { ++ } else if (!file->f_op->fallocate) { + max_discard_sectors = 0; + granularity = 0; + +@@ -1084,43 +919,6 @@ static void loop_update_rotational(struct loop_device *lo) + blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); + } + +-static int +-loop_release_xfer(struct loop_device *lo) +-{ +- int err = 0; +- struct loop_func_table *xfer = lo->lo_encryption; +- +- if (xfer) { +- if (xfer->release) +- err = xfer->release(lo); +- lo->transfer = NULL; +- lo->lo_encryption = NULL; +- module_put(xfer->owner); +- } +- return err; +-} +- +-static int +-loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, +- const struct loop_info64 *i) +-{ +- int err = 0; +- +- if (xfer) { +- struct module *owner = xfer->owner; +- +- if (!try_module_get(owner)) +- return -EINVAL; +- if (xfer->init) +- err = xfer->init(lo, i); +- if (err) +- module_put(owner); +- else +- lo->lo_encryption = xfer; +- } +- return err; +-} +- + /** + * loop_set_status_from_info - configure device from loop_info + * @lo: struct loop_device to configure +@@ -1133,55 +931,27 @@ static int + loop_set_status_from_info(struct loop_device *lo, + const struct loop_info64 *info) + { +- int err; +- struct loop_func_table *xfer; +- kuid_t uid = current_uid(); +- + if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) + return -EINVAL; + +- err = loop_release_xfer(lo); +- if (err) +- return err; +- +- if (info->lo_encrypt_type) { +- unsigned int type = info->lo_encrypt_type; +- +- if (type >= MAX_LO_CRYPT) +- return -EINVAL; +- xfer = xfer_funcs[type]; +- if (xfer == NULL) +- return -EINVAL; +- } else +- xfer = NULL; +- +- err = loop_init_xfer(lo, xfer, info); +- if (err) +- return err; ++ switch (info->lo_encrypt_type) { ++ case LO_CRYPT_NONE: ++ break; ++ case LO_CRYPT_XOR: ++ pr_warn("support for the xor transformation has been removed.\n"); ++ return -EINVAL; ++ case LO_CRYPT_CRYPTOAPI: ++ pr_warn("support for cryptoloop has been removed. Use dm-crypt instead.\n"); ++ return -EINVAL; ++ default: ++ return -EINVAL; ++ } + + lo->lo_offset = info->lo_offset; + lo->lo_sizelimit = info->lo_sizelimit; + memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); +- memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); + lo->lo_file_name[LO_NAME_SIZE-1] = 0; +- lo->lo_crypt_name[LO_NAME_SIZE-1] = 0; +- +- if (!xfer) +- xfer = &none_funcs; +- lo->transfer = xfer->transfer; +- lo->ioctl = xfer->ioctl; +- + lo->lo_flags = info->lo_flags; +- +- lo->lo_encrypt_key_size = info->lo_encrypt_key_size; +- lo->lo_init[0] = info->lo_init[0]; +- lo->lo_init[1] = info->lo_init[1]; +- if (info->lo_encrypt_key_size) { +- memcpy(lo->lo_encrypt_key, info->lo_encrypt_key, +- info->lo_encrypt_key_size); +- lo->lo_key_owner = uid; +- } +- + return 0; + } + +@@ -1381,16 +1151,9 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) + lo->lo_backing_file = NULL; + spin_unlock_irq(&lo->lo_lock); + +- loop_release_xfer(lo); +- lo->transfer = NULL; +- lo->ioctl = NULL; + lo->lo_device = NULL; +- lo->lo_encryption = NULL; + lo->lo_offset = 0; + lo->lo_sizelimit = 0; +- lo->lo_encrypt_key_size = 0; +- memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); +- memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); + memset(lo->lo_file_name, 0, LO_NAME_SIZE); + blk_queue_logical_block_size(lo->lo_queue, 512); + blk_queue_physical_block_size(lo->lo_queue, 512); +@@ -1498,7 +1261,6 @@ static int + loop_set_status(struct loop_device *lo, const struct loop_info64 *info) + { + int err; +- kuid_t uid = current_uid(); + int prev_lo_flags; + bool partscan = false; + bool size_changed = false; +@@ -1506,12 +1268,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) + err = mutex_lock_killable(&lo->lo_mutex); + if (err) + return err; +- if (lo->lo_encrypt_key_size && +- !uid_eq(lo->lo_key_owner, uid) && +- !capable(CAP_SYS_ADMIN)) { +- err = -EPERM; +- goto out_unlock; +- } + if (lo->lo_state != Lo_bound) { + err = -ENXIO; + goto out_unlock; +@@ -1597,14 +1353,6 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) + info->lo_sizelimit = lo->lo_sizelimit; + info->lo_flags = lo->lo_flags; + memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE); +- memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE); +- info->lo_encrypt_type = +- lo->lo_encryption ? lo->lo_encryption->number : 0; +- if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) { +- info->lo_encrypt_key_size = lo->lo_encrypt_key_size; +- memcpy(info->lo_encrypt_key, lo->lo_encrypt_key, +- lo->lo_encrypt_key_size); +- } + + /* Drop lo_mutex while we call into the filesystem. */ + path = lo->lo_backing_file->f_path; +@@ -1630,16 +1378,8 @@ loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64) + info64->lo_rdevice = info->lo_rdevice; + info64->lo_offset = info->lo_offset; + info64->lo_sizelimit = 0; +- info64->lo_encrypt_type = info->lo_encrypt_type; +- info64->lo_encrypt_key_size = info->lo_encrypt_key_size; + info64->lo_flags = info->lo_flags; +- info64->lo_init[0] = info->lo_init[0]; +- info64->lo_init[1] = info->lo_init[1]; +- if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI) +- memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE); +- else +- memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE); +- memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE); ++ memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE); + } + + static int +@@ -1651,16 +1391,8 @@ loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info) + info->lo_inode = info64->lo_inode; + info->lo_rdevice = info64->lo_rdevice; + info->lo_offset = info64->lo_offset; +- info->lo_encrypt_type = info64->lo_encrypt_type; +- info->lo_encrypt_key_size = info64->lo_encrypt_key_size; + info->lo_flags = info64->lo_flags; +- info->lo_init[0] = info64->lo_init[0]; +- info->lo_init[1] = info64->lo_init[1]; +- if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI) +- memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE); +- else +- memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE); +- memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE); ++ memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE); + + /* error in case values were truncated */ + if (info->lo_device != info64->lo_device || +@@ -1809,7 +1541,7 @@ static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd, + err = loop_set_block_size(lo, arg); + break; + default: +- err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; ++ err = -EINVAL; + } + mutex_unlock(&lo->lo_mutex); + return err; +@@ -1885,7 +1617,6 @@ struct compat_loop_info { + compat_ulong_t lo_inode; /* ioctl r/o */ + compat_dev_t lo_rdevice; /* ioctl r/o */ + compat_int_t lo_offset; +- compat_int_t lo_encrypt_type; + compat_int_t lo_encrypt_key_size; /* ioctl w/o */ + compat_int_t lo_flags; /* ioctl r/o */ + char lo_name[LO_NAME_SIZE]; +@@ -1914,16 +1645,8 @@ loop_info64_from_compat(const struct compat_loop_info __user *arg, + info64->lo_rdevice = info.lo_rdevice; + info64->lo_offset = info.lo_offset; + info64->lo_sizelimit = 0; +- info64->lo_encrypt_type = info.lo_encrypt_type; +- info64->lo_encrypt_key_size = info.lo_encrypt_key_size; + info64->lo_flags = info.lo_flags; +- info64->lo_init[0] = info.lo_init[0]; +- info64->lo_init[1] = info.lo_init[1]; +- if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI) +- memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE); +- else +- memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE); +- memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE); ++ memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE); + return 0; + } + +@@ -1943,24 +1666,14 @@ loop_info64_to_compat(const struct loop_info64 *info64, + info.lo_inode = info64->lo_inode; + info.lo_rdevice = info64->lo_rdevice; + info.lo_offset = info64->lo_offset; +- info.lo_encrypt_type = info64->lo_encrypt_type; +- info.lo_encrypt_key_size = info64->lo_encrypt_key_size; + info.lo_flags = info64->lo_flags; +- info.lo_init[0] = info64->lo_init[0]; +- info.lo_init[1] = info64->lo_init[1]; +- if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI) +- memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE); +- else +- memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE); +- memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE); ++ memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE); + + /* error in case values were truncated */ + if (info.lo_device != info64->lo_device || + info.lo_rdevice != info64->lo_rdevice || + info.lo_inode != info64->lo_inode || +- info.lo_offset != info64->lo_offset || +- info.lo_init[0] != info64->lo_init[0] || +- info.lo_init[1] != info64->lo_init[1]) ++ info.lo_offset != info64->lo_offset) + return -EOVERFLOW; + + if (copy_to_user(arg, &info, sizeof(info))) +@@ -2101,43 +1814,6 @@ MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device"); + MODULE_LICENSE("GPL"); + MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR); + +-int loop_register_transfer(struct loop_func_table *funcs) +-{ +- unsigned int n = funcs->number; +- +- if (n >= MAX_LO_CRYPT || xfer_funcs[n]) +- return -EINVAL; +- xfer_funcs[n] = funcs; +- return 0; +-} +- +-int loop_unregister_transfer(int number) +-{ +- unsigned int n = number; +- struct loop_func_table *xfer; +- +- if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL) +- return -EINVAL; +- /* +- * This function is called from only cleanup_cryptoloop(). +- * Given that each loop device that has a transfer enabled holds a +- * reference to the module implementing it we should never get here +- * with a transfer that is set (unless forced module unloading is +- * requested). Thus, check module's refcount and warn if this is +- * not a clean unloading. +- */ +-#ifdef CONFIG_MODULE_UNLOAD +- if (xfer->owner && module_refcount(xfer->owner) != -1) +- pr_err("Danger! Unregistering an in use transfer function.\n"); +-#endif +- +- xfer_funcs[n] = NULL; +- return 0; +-} +- +-EXPORT_SYMBOL(loop_register_transfer); +-EXPORT_SYMBOL(loop_unregister_transfer); +- + static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) + { +diff --git a/drivers/block/loop.h b/drivers/block/loop.h +index 04c88dd6eabd..082d4b6bfc6a 100644 +--- a/drivers/block/loop.h ++++ b/drivers/block/loop.h +@@ -32,23 +32,10 @@ struct loop_device { + loff_t lo_offset; + loff_t lo_sizelimit; + int lo_flags; +- int (*transfer)(struct loop_device *, int cmd, +- struct page *raw_page, unsigned raw_off, +- struct page *loop_page, unsigned loop_off, +- int size, sector_t real_block); + char lo_file_name[LO_NAME_SIZE]; +- char lo_crypt_name[LO_NAME_SIZE]; +- char lo_encrypt_key[LO_KEY_SIZE]; +- int lo_encrypt_key_size; +- struct loop_func_table *lo_encryption; +- __u32 lo_init[2]; +- kuid_t lo_key_owner; /* Who set the key */ +- int (*ioctl)(struct loop_device *, int cmd, +- unsigned long arg); + + struct file * lo_backing_file; + struct block_device *lo_device; +- void *key_data; + + gfp_t old_gfp_mask; + +@@ -82,21 +69,4 @@ struct loop_cmd { + struct cgroup_subsys_state *memcg_css; + }; + +-/* Support for loadable transfer modules */ +-struct loop_func_table { +- int number; /* filter type */ +- int (*transfer)(struct loop_device *lo, int cmd, +- struct page *raw_page, unsigned raw_off, +- struct page *loop_page, unsigned loop_off, +- int size, sector_t real_block); +- int (*init)(struct loop_device *, const struct loop_info64 *); +- /* release is called from loop_unregister_transfer or clr_fd */ +- int (*release)(struct loop_device *); +- int (*ioctl)(struct loop_device *, int cmd, unsigned long arg); +- struct module *owner; +-}; +- +-int loop_register_transfer(struct loop_func_table *funcs); +-int loop_unregister_transfer(int number); +- + #endif +-- +2.35.3 + diff --git a/patches.suse/block-remove-the-initialize_rq_fn-blk_mq_ops-method.patch b/patches.suse/block-remove-the-initialize_rq_fn-blk_mq_ops-method.patch new file mode 100644 index 0000000..c5611eb --- /dev/null +++ b/patches.suse/block-remove-the-initialize_rq_fn-blk_mq_ops-method.patch @@ -0,0 +1,60 @@ +From: Christoph Hellwig +Date: Thu, 21 Oct 2021 08:06:06 +0200 +Subject: [PATCH] block: remove the initialize_rq_fn blk_mq_ops method +Git-commit: 4abafdc4360d993104c2b2f85943938a0c6ad025 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Entirely unused now. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Hannes Reinecke +Link: https://lore.kernel.org/r/20211021060607.264371-7-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-core.c | 9 +-------- + include/linux/blk-mq.h | 5 ----- + 2 files changed, 1 insertion(+), 13 deletions(-) + +diff --git a/block/blk-core.c b/block/blk-core.c +index fd389a16013c..5ffe05b1d17c 100644 +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -606,16 +606,9 @@ EXPORT_SYMBOL(blk_get_queue); + struct request *blk_get_request(struct request_queue *q, unsigned int op, + blk_mq_req_flags_t flags) + { +- struct request *req; +- + WARN_ON_ONCE(op & REQ_NOWAIT); + WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PM)); +- +- req = blk_mq_alloc_request(q, op, flags); +- if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn) +- q->mq_ops->initialize_rq_fn(req); +- +- return req; ++ return blk_mq_alloc_request(q, op, flags); + } + EXPORT_SYMBOL(blk_get_request); + +diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h +index b4039fdf1b04..ebc45cf0450b 100644 +--- a/include/linux/blk-mq.h ++++ b/include/linux/blk-mq.h +@@ -566,11 +566,6 @@ struct blk_mq_ops { + void (*exit_request)(struct blk_mq_tag_set *set, struct request *, + unsigned int); + +- /** +- * @initialize_rq_fn: Called from inside blk_get_request(). +- */ +- void (*initialize_rq_fn)(struct request *rq); +- + /** + * @cleanup_rq: Called before freeing one request which isn't completed + * yet, and usually for freeing the driver private data. +-- +2.35.3 + diff --git a/patches.suse/block-remove-useless-caller-argument-to-print_req_er.patch b/patches.suse/block-remove-useless-caller-argument-to-print_req_er.patch new file mode 100644 index 0000000..b501c8b --- /dev/null +++ b/patches.suse/block-remove-useless-caller-argument-to-print_req_er.patch @@ -0,0 +1,55 @@ +From: Jens Axboe +Date: Thu, 14 Oct 2021 09:15:40 -0600 +Subject: [PATCH] block: remove useless caller argument to print_req_error() +Git-commit: c477b7977838ac97dd9d20625591a5d23c8079b7 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We have exactly one caller of this, just get rid of adding the useless +function name to the output. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-core.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +diff --git a/block/blk-core.c b/block/blk-core.c +index 96ee996c0577..b4094b31c99c 100644 +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -216,8 +216,7 @@ int blk_status_to_errno(blk_status_t status) + } + EXPORT_SYMBOL_GPL(blk_status_to_errno); + +-static void print_req_error(struct request *req, blk_status_t status, +- const char *caller) ++static void print_req_error(struct request *req, blk_status_t status) + { + int idx = (__force int)status; + +@@ -225,9 +224,9 @@ static void print_req_error(struct request *req, blk_status_t status, + return; + + printk_ratelimited(KERN_ERR +- "%s: %s error, dev %s, sector %llu op 0x%x:(%s) flags 0x%x " ++ "%s error, dev %s, sector %llu op 0x%x:(%s) flags 0x%x " + "phys_seg %u prio class %u\n", +- caller, blk_errors[idx].name, ++ blk_errors[idx].name, + req->rq_disk ? req->rq_disk->disk_name : "?", + blk_rq_pos(req), req_op(req), blk_op_str(req_op(req)), + req->cmd_flags & ~REQ_OP_MASK, +@@ -1471,7 +1470,7 @@ bool blk_update_request(struct request *req, blk_status_t error, + + if (unlikely(error && !blk_rq_is_passthrough(req) && + !(req->rq_flags & RQF_QUIET))) +- print_req_error(req, error, __func__); ++ print_req_error(req, error); + + blk_account_io_completion(req, nr_bytes); + +-- +2.35.3 + diff --git a/patches.suse/block-rename-REQ_HIPRI-to-REQ_POLLED.patch b/patches.suse/block-rename-REQ_HIPRI-to-REQ_POLLED.patch new file mode 100644 index 0000000..a76b636 --- /dev/null +++ b/patches.suse/block-rename-REQ_HIPRI-to-REQ_POLLED.patch @@ -0,0 +1,241 @@ +From: Christoph Hellwig +Date: Tue, 12 Oct 2021 13:12:21 +0200 +Subject: [PATCH] block: rename REQ_HIPRI to REQ_POLLED +Git-commit: 6ce913fe3eee14f40f778e85999c9e599dda8c6b +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Unlike the RWF_HIPRI userspace ABI which is intentionally kept vague, +the bio flag is specific to the polling implementation, so rename and +document it properly. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Sagi Grimberg +Reviewed-by: Chaitanya Kulkarni +Tested-by: Mark Wunderlich +Link: https://lore.kernel.org/r/20211012111226.760968-12-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-core.c | 2 +- + block/blk-merge.c | 3 +-- + block/blk-mq-debugfs.c | 2 +- + block/blk-mq.c | 4 ++-- + block/blk-mq.h | 4 ++-- + block/blk.h | 4 ++-- + drivers/nvme/host/core.c | 2 +- + drivers/scsi/scsi_debug.c | 10 +++++----- + include/linux/bio.h | 2 +- + include/linux/blk_types.h | 4 ++-- + mm/page_io.c | 2 +- + 11 files changed, 19 insertions(+), 20 deletions(-) + +diff --git a/block/blk-core.c b/block/blk-core.c +index 80fa7aa394c7..8eb0e08d5395 100644 +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -842,7 +842,7 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio) + } + + if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) +- bio_clear_hipri(bio); ++ bio_clear_polled(bio); + + switch (bio_op(bio)) { + case REQ_OP_DISCARD: +diff --git a/block/blk-merge.c b/block/blk-merge.c +index 9b77b4d6c2a1..f88d7863f997 100644 +--- a/block/blk-merge.c ++++ b/block/blk-merge.c +@@ -318,8 +318,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, + * iopoll in direct IO routine. Given performance gain of iopoll for + * big IO can be trival, disable iopoll when split needed. + */ +- bio_clear_hipri(bio); +- ++ bio_clear_polled(bio); + return bio_split(bio, sectors, GFP_NOIO, bs); + } + +diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c +index 3daea160d670..409a8256d9ff 100644 +--- a/block/blk-mq-debugfs.c ++++ b/block/blk-mq-debugfs.c +@@ -287,7 +287,7 @@ static const char *const cmd_flag_name[] = { + CMD_FLAG_NAME(BACKGROUND), + CMD_FLAG_NAME(NOWAIT), + CMD_FLAG_NAME(NOUNMAP), +- CMD_FLAG_NAME(HIPRI), ++ CMD_FLAG_NAME(POLLED), + }; + #undef CMD_FLAG_NAME + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 97c24e461d0a..a34ffcf861c3 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -732,7 +732,7 @@ bool blk_mq_complete_request_remote(struct request *rq) + * For a polled request, always complete locallly, it's pointless + * to redirect the completion. + */ +- if (rq->cmd_flags & REQ_HIPRI) ++ if (rq->cmd_flags & REQ_POLLED) + return false; + + if (blk_mq_complete_need_ipi(rq)) { +@@ -2278,7 +2278,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) + + rq_qos_throttle(q, bio); + +- hipri = bio->bi_opf & REQ_HIPRI; ++ hipri = bio->bi_opf & REQ_POLLED; + + plug = blk_mq_plug(q, bio); + if (plug && plug->cached_rq) { +diff --git a/block/blk-mq.h b/block/blk-mq.h +index 5da970bb8865..a9fe01e14951 100644 +--- a/block/blk-mq.h ++++ b/block/blk-mq.h +@@ -106,9 +106,9 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, + enum hctx_type type = HCTX_TYPE_DEFAULT; + + /* +- * The caller ensure that if REQ_HIPRI, poll must be enabled. ++ * The caller ensure that if REQ_POLLED, poll must be enabled. + */ +- if (flags & REQ_HIPRI) ++ if (flags & REQ_POLLED) + type = HCTX_TYPE_POLL; + else if ((flags & REQ_OP_MASK) == REQ_OP_READ) + type = HCTX_TYPE_READ; +diff --git a/block/blk.h b/block/blk.h +index cab8d659d8a6..fa05d3f07976 100644 +--- a/block/blk.h ++++ b/block/blk.h +@@ -416,11 +416,11 @@ extern struct device_attribute dev_attr_events; + extern struct device_attribute dev_attr_events_async; + extern struct device_attribute dev_attr_events_poll_msecs; + +-static inline void bio_clear_hipri(struct bio *bio) ++static inline void bio_clear_polled(struct bio *bio) + { + /* can't support alloc cache if we turn off polling */ + bio_clear_flag(bio, BIO_PERCPU_CACHE); +- bio->bi_opf &= ~REQ_HIPRI; ++ bio->bi_opf &= ~REQ_POLLED; + } + + long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg); +diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c +index 3d444b13cd69..ae15cb714596 100644 +--- a/drivers/nvme/host/core.c ++++ b/drivers/nvme/host/core.c +@@ -632,7 +632,7 @@ static inline void nvme_init_request(struct request *req, + + req->cmd_flags |= REQ_FAILFAST_DRIVER; + if (req->mq_hctx->type == HCTX_TYPE_POLL) +- req->cmd_flags |= REQ_HIPRI; ++ req->cmd_flags |= REQ_POLLED; + nvme_clear_nvme_request(req); + memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd)); + } +diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c +index 66f507469a31..40b473eea357 100644 +--- a/drivers/scsi/scsi_debug.c ++++ b/drivers/scsi/scsi_debug.c +@@ -5384,7 +5384,7 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, + { + bool new_sd_dp; + bool inject = false; +- bool hipri = scsi_cmd_to_rq(cmnd)->cmd_flags & REQ_HIPRI; ++ bool polled = scsi_cmd_to_rq(cmnd)->cmd_flags & REQ_POLLED; + int k, num_in_q, qdepth; + unsigned long iflags; + u64 ns_from_boot = 0; +@@ -5471,7 +5471,7 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, + if (sdebug_host_max_queue) + sd_dp->hc_idx = get_tag(cmnd); + +- if (hipri) ++ if (polled) + ns_from_boot = ktime_get_boottime_ns(); + + /* one of the resp_*() response functions is called here */ +@@ -5531,7 +5531,7 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, + kt -= d; + } + } +- if (hipri) { ++ if (polled) { + sd_dp->cmpl_ts = ktime_add(ns_to_ktime(ns_from_boot), kt); + spin_lock_irqsave(&sqp->qc_lock, iflags); + if (!sd_dp->init_poll) { +@@ -5562,7 +5562,7 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, + if (unlikely((sdebug_opts & SDEBUG_OPT_CMD_ABORT) && + atomic_read(&sdeb_inject_pending))) + sd_dp->aborted = true; +- if (hipri) { ++ if (polled) { + sd_dp->cmpl_ts = ns_to_ktime(ns_from_boot); + spin_lock_irqsave(&sqp->qc_lock, iflags); + if (!sd_dp->init_poll) { +@@ -7331,7 +7331,7 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num) + if (kt_from_boot < sd_dp->cmpl_ts) + continue; + +- } else /* ignoring non REQ_HIPRI requests */ ++ } else /* ignoring non REQ_POLLED requests */ + continue; + devip = (struct sdebug_dev_info *)scp->device->hostdata; + if (likely(devip)) +diff --git a/include/linux/bio.h b/include/linux/bio.h +index d8d27742a75f..c7a2d880e927 100644 +--- a/include/linux/bio.h ++++ b/include/linux/bio.h +@@ -706,7 +706,7 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page, + */ + static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb) + { +- bio->bi_opf |= REQ_HIPRI; ++ bio->bi_opf |= REQ_POLLED; + if (!is_sync_kiocb(kiocb)) + bio->bi_opf |= REQ_NOWAIT; + } +diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h +index 5017ba8fc539..f8b9fce68834 100644 +--- a/include/linux/blk_types.h ++++ b/include/linux/blk_types.h +@@ -384,7 +384,7 @@ enum req_flag_bits { + /* command specific flags for REQ_OP_WRITE_ZEROES: */ + __REQ_NOUNMAP, /* do not free blocks when zeroing */ + +- __REQ_HIPRI, ++ __REQ_POLLED, /* caller polls for completion using blk_poll */ + + /* for driver use */ + __REQ_DRV, +@@ -409,7 +409,7 @@ enum req_flag_bits { + #define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) + + #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) +-#define REQ_HIPRI (1ULL << __REQ_HIPRI) ++#define REQ_POLLED (1ULL << __REQ_POLLED) + + #define REQ_DRV (1ULL << __REQ_DRV) + #define REQ_SWAP (1ULL << __REQ_SWAP) +diff --git a/mm/page_io.c b/mm/page_io.c +index 5d5543fcefa4..ed2eded74f3a 100644 +--- a/mm/page_io.c ++++ b/mm/page_io.c +@@ -416,7 +416,7 @@ int swap_readpage(struct page *page, bool synchronous) + * attempt to access it in the page fault retry time check. + */ + if (synchronous) { +- bio->bi_opf |= REQ_HIPRI; ++ bio->bi_opf |= REQ_POLLED; + get_task_struct(current); + bio->bi_private = current; + } +-- +2.35.3 + diff --git a/patches.suse/block-replace-the-spin-argument-to-blk_iopoll-with-a.patch b/patches.suse/block-replace-the-spin-argument-to-blk_iopoll-with-a.patch new file mode 100644 index 0000000..af2d282 --- /dev/null +++ b/patches.suse/block-replace-the-spin-argument-to-blk_iopoll-with-a.patch @@ -0,0 +1,258 @@ +From: Christoph Hellwig +Date: Tue, 12 Oct 2021 13:12:19 +0200 +Subject: [PATCH] block: replace the spin argument to blk_iopoll with a flags + argument +Git-commit: ef99b2d37666b7a600baab9e1c4944436652b0a2 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Switch the boolean spin argument to blk_poll to passing a set of flags +instead. This will allow to control polling behavior in a more fine +grained way. + +Signed-off-by: Christoph Hellwig +Tested-by: Mark Wunderlich +Link: https://lore.kernel.org/r/20211012111226.760968-10-hch@lst.de +[axboe: adapt to changed io_uring iopoll] +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-exec.c | 2 +- + block/blk-mq.c | 17 +++++++---------- + block/fops.c | 8 ++++---- + fs/io_uring.c | 9 +++++---- + fs/iomap/direct-io.c | 6 +++--- + include/linux/blkdev.h | 4 +++- + include/linux/fs.h | 2 +- + include/linux/iomap.h | 2 +- + mm/page_io.c | 2 +- + 9 files changed, 26 insertions(+), 26 deletions(-) + +diff --git a/block/blk-exec.c b/block/blk-exec.c +index d6cd501c0d34..1fa7f25e5726 100644 +--- a/block/blk-exec.c ++++ b/block/blk-exec.c +@@ -71,7 +71,7 @@ static bool blk_rq_is_poll(struct request *rq) + static void blk_rq_poll_completion(struct request *rq, struct completion *wait) + { + do { +- blk_poll(rq->q, request_to_qc_t(rq->mq_hctx, rq), true); ++ blk_poll(rq->q, request_to_qc_t(rq->mq_hctx, rq), 0); + cond_resched(); + } while (!completion_done(wait)); + } +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 7d0d947921a6..6609e10657a8 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -4052,7 +4052,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q, blk_qc_t qc) + } + + static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie, +- bool spin) ++ unsigned int flags) + { + struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie); + long state = get_current_state(); +@@ -4075,7 +4075,7 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie, + if (task_is_running(current)) + return 1; + +- if (ret < 0 || !spin) ++ if (ret < 0 || (flags & BLK_POLL_ONESHOT)) + break; + cpu_relax(); + } while (!need_resched()); +@@ -4088,15 +4088,13 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie, + * blk_poll - poll for IO completions + * @q: the queue + * @cookie: cookie passed back at IO submission time +- * @spin: whether to spin for completions ++ * @flags: BLK_POLL_* flags that control the behavior + * + * Description: + * Poll for completions on the passed in queue. Returns number of +- * completed entries found. If @spin is true, then blk_poll will continue +- * looping until at least one completion is found, unless the task is +- * otherwise marked running (or we need to reschedule). ++ * completed entries found. + */ +-int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin) ++int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags) + { + if (cookie == BLK_QC_T_NONE || + !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) +@@ -4105,12 +4103,11 @@ int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin) + if (current->plug) + blk_flush_plug_list(current->plug, false); + +- /* If specified not to spin, we also should not sleep. */ +- if (spin && q->poll_nsec != BLK_MQ_POLL_CLASSIC) { ++ if (q->poll_nsec != BLK_MQ_POLL_CLASSIC) { + if (blk_mq_poll_hybrid(q, cookie)) + return 1; + } +- return blk_mq_poll_classic(q, cookie, spin); ++ return blk_mq_poll_classic(q, cookie, flags); + } + EXPORT_SYMBOL_GPL(blk_poll); + +diff --git a/block/fops.c b/block/fops.c +index 15324f2e5a91..db8f2fe68dd2 100644 +--- a/block/fops.c ++++ b/block/fops.c +@@ -108,7 +108,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, + if (!READ_ONCE(bio.bi_private)) + break; + if (!(iocb->ki_flags & IOCB_HIPRI) || +- !blk_poll(bdev_get_queue(bdev), qc, true)) ++ !blk_poll(bdev_get_queue(bdev), qc, 0)) + blk_io_schedule(); + } + __set_current_state(TASK_RUNNING); +@@ -141,12 +141,12 @@ struct blkdev_dio { + + static struct bio_set blkdev_dio_pool; + +-static int blkdev_iopoll(struct kiocb *kiocb, bool wait) ++static int blkdev_iopoll(struct kiocb *kiocb, unsigned int flags) + { + struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host); + struct request_queue *q = bdev_get_queue(bdev); + +- return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait); ++ return blk_poll(q, READ_ONCE(kiocb->ki_cookie), flags); + } + + static void blkdev_bio_end_io(struct bio *bio) +@@ -297,7 +297,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + if (!READ_ONCE(dio->waiter)) + break; + +- if (!do_poll || !blk_poll(bdev_get_queue(bdev), qc, true)) ++ if (!do_poll || !blk_poll(bdev_get_queue(bdev), qc, 0)) + blk_io_schedule(); + } + __set_current_state(TASK_RUNNING); +diff --git a/fs/io_uring.c b/fs/io_uring.c +index d2e86788c872..541fec2bd49a 100644 +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -2457,14 +2457,15 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, + long min) + { + struct io_kiocb *req, *tmp; ++ unsigned int poll_flags = 0; + LIST_HEAD(done); +- bool spin; + + /* + * Only spin for completions if we don't have multiple devices hanging + * off our complete list, and we're under the requested amount. + */ +- spin = !ctx->poll_multi_queue && *nr_events < min; ++ if (ctx->poll_multi_queue || *nr_events >= min) ++ poll_flags |= BLK_POLL_ONESHOT; + + list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) { + struct kiocb *kiocb = &req->rw.kiocb; +@@ -2482,11 +2483,11 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, + if (!list_empty(&done)) + break; + +- ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin); ++ ret = kiocb->ki_filp->f_op->iopoll(kiocb, poll_flags); + if (unlikely(ret < 0)) + return ret; + else if (ret) +- spin = false; ++ poll_flags |= BLK_POLL_ONESHOT; + + /* iopoll may have completed current req */ + if (READ_ONCE(req->iopoll_completed)) +diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c +index 560ae967f70e..236aba256cd1 100644 +--- a/fs/iomap/direct-io.c ++++ b/fs/iomap/direct-io.c +@@ -49,13 +49,13 @@ struct iomap_dio { + }; + }; + +-int iomap_dio_iopoll(struct kiocb *kiocb, bool spin) ++int iomap_dio_iopoll(struct kiocb *kiocb, unsigned int flags) + { + struct request_queue *q = READ_ONCE(kiocb->private); + + if (!q) + return 0; +- return blk_poll(q, READ_ONCE(kiocb->ki_cookie), spin); ++ return blk_poll(q, READ_ONCE(kiocb->ki_cookie), flags); + } + EXPORT_SYMBOL_GPL(iomap_dio_iopoll); + +@@ -642,7 +642,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + if (!(iocb->ki_flags & IOCB_HIPRI) || + !dio->submit.last_queue || + !blk_poll(dio->submit.last_queue, +- dio->submit.cookie, true)) ++ dio->submit.cookie, 0)) + blk_io_schedule(); + } + __set_current_state(TASK_RUNNING); +diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h +index 17705c970d7e..e177346bc020 100644 +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -564,7 +564,9 @@ extern const char *blk_op_str(unsigned int op); + int blk_status_to_errno(blk_status_t status); + blk_status_t errno_to_blk_status(int errno); + +-int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin); ++/* only poll the hardware once, don't continue until a completion was found */ ++#define BLK_POLL_ONESHOT (1 << 0) ++int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags); + + static inline struct request_queue *bdev_get_queue(struct block_device *bdev) + { +diff --git a/include/linux/fs.h b/include/linux/fs.h +index e7a633353fd2..c443cddf414f 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -2075,7 +2075,7 @@ struct file_operations { + ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); + ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); + ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); +- int (*iopoll)(struct kiocb *kiocb, bool spin); ++ int (*iopoll)(struct kiocb *kiocb, unsigned int flags); + int (*iterate) (struct file *, struct dir_context *); + int (*iterate_shared) (struct file *, struct dir_context *); + __poll_t (*poll) (struct file *, struct poll_table_struct *); +diff --git a/include/linux/iomap.h b/include/linux/iomap.h +index 24f8489583ca..1e86b65567c2 100644 +--- a/include/linux/iomap.h ++++ b/include/linux/iomap.h +@@ -337,7 +337,7 @@ struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + const struct iomap_ops *ops, const struct iomap_dio_ops *dops, + unsigned int dio_flags); + ssize_t iomap_dio_complete(struct iomap_dio *dio); +-int iomap_dio_iopoll(struct kiocb *kiocb, bool spin); ++int iomap_dio_iopoll(struct kiocb *kiocb, unsigned int flags); + + #ifdef CONFIG_SWAP + struct file; +diff --git a/mm/page_io.c b/mm/page_io.c +index c493ce9ebcf5..5d5543fcefa4 100644 +--- a/mm/page_io.c ++++ b/mm/page_io.c +@@ -428,7 +428,7 @@ int swap_readpage(struct page *page, bool synchronous) + if (!READ_ONCE(bio->bi_private)) + break; + +- if (!blk_poll(disk->queue, qc, true)) ++ if (!blk_poll(disk->queue, qc, 0)) + blk_io_schedule(); + } + __set_current_state(TASK_RUNNING); +-- +2.35.3 + diff --git a/patches.suse/block-return-whether-or-not-to-unplug-through-boolea.patch b/patches.suse/block-return-whether-or-not-to-unplug-through-boolea.patch new file mode 100644 index 0000000..eb6fc27 --- /dev/null +++ b/patches.suse/block-return-whether-or-not-to-unplug-through-boolea.patch @@ -0,0 +1,123 @@ +From: Jens Axboe +Date: Mon, 18 Oct 2021 10:07:09 -0600 +Subject: [PATCH] block: return whether or not to unplug through boolean +Git-commit: 87c037d11b83b93e9ab5eda9fb03c114f67024ff +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Instead of returning the same queue request through a request pointer, +use a boolean to accomplish the same. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-merge.c | 11 +++++------ + block/blk-mq.c | 16 +++++++++------- + block/blk.h | 2 +- + 3 files changed, 15 insertions(+), 14 deletions(-) + +diff --git a/block/blk-merge.c b/block/blk-merge.c +index ec727234ac48..c273b58378ce 100644 +--- a/block/blk-merge.c ++++ b/block/blk-merge.c +@@ -1067,9 +1067,8 @@ static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q, + * @q: request_queue new bio is being queued at + * @bio: new bio being queued + * @nr_segs: number of segments in @bio +- * @same_queue_rq: pointer to &struct request that gets filled in when +- * another request associated with @q is found on the plug list +- * (optional, may be %NULL) ++ * @same_queue_rq: output value, will be true if there's an existing request ++ * from the passed in @q already in the plug list + * + * Determine whether @bio being queued on @q can be merged with the previous + * request on %current's plugged list. Returns %true if merge was successful, +@@ -1085,7 +1084,7 @@ static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q, + * Caller must ensure !blk_queue_nomerges(q) beforehand. + */ + bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, +- unsigned int nr_segs, struct request **same_queue_rq) ++ unsigned int nr_segs, bool *same_queue_rq) + { + struct blk_plug *plug; + struct request *rq; +@@ -1096,12 +1095,12 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, + + /* check the previously added entry for a quick merge attempt */ + rq = list_last_entry(&plug->mq_list, struct request, queuelist); +- if (rq->q == q && same_queue_rq) { ++ if (rq->q == q) { + /* + * Only blk-mq multiple hardware queues case checks the rq in + * the same queue, there should be only one such rq in a queue + */ +- *same_queue_rq = rq; ++ *same_queue_rq = true; + } + if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == BIO_MERGE_OK) + return true; +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 59809ec24303..335ec3a7eab7 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -2432,7 +2432,7 @@ void blk_mq_submit_bio(struct bio *bio) + const int is_flush_fua = op_is_flush(bio->bi_opf); + struct request *rq; + struct blk_plug *plug; +- struct request *same_queue_rq = NULL; ++ bool same_queue_rq = false; + unsigned int nr_segs = 1; + blk_status_t ret; + +@@ -2525,6 +2525,8 @@ void blk_mq_submit_bio(struct bio *bio) + /* Insert the request at the IO scheduler queue */ + blk_mq_sched_insert_request(rq, false, true, true); + } else if (plug && !blk_queue_nomerges(q)) { ++ struct request *next_rq = NULL; ++ + /* + * We do limited plugging. If the bio can be merged, do that. + * Otherwise the existing request in the plug list will be +@@ -2532,19 +2534,19 @@ void blk_mq_submit_bio(struct bio *bio) + * The plug list might get flushed before this. If that happens, + * the plug list is empty, and same_queue_rq is invalid. + */ +- if (list_empty(&plug->mq_list)) +- same_queue_rq = NULL; + if (same_queue_rq) { +- list_del_init(&same_queue_rq->queuelist); ++ next_rq = list_last_entry(&plug->mq_list, ++ struct request, ++ queuelist); ++ list_del_init(&next_rq->queuelist); + plug->rq_count--; + } + blk_add_rq_to_plug(plug, rq); + trace_block_plug(q); + +- if (same_queue_rq) { ++ if (next_rq) { + trace_block_unplug(q, 1, true); +- blk_mq_try_issue_directly(same_queue_rq->mq_hctx, +- same_queue_rq); ++ blk_mq_try_issue_directly(next_rq->mq_hctx, next_rq); + } + } else if ((q->nr_hw_queues > 1 && is_sync) || + !rq->mq_hctx->dispatch_busy) { +diff --git a/block/blk.h b/block/blk.h +index e80350327e6d..b9729c12fd62 100644 +--- a/block/blk.h ++++ b/block/blk.h +@@ -218,7 +218,7 @@ void blk_add_timer(struct request *req); + void blk_print_req_error(struct request *req, blk_status_t status); + + bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, +- unsigned int nr_segs, struct request **same_queue_rq); ++ unsigned int nr_segs, bool *same_queue_rq); + bool blk_bio_list_merge(struct request_queue *q, struct list_head *list, + struct bio *bio, unsigned int nr_segs); + +-- +2.35.3 + diff --git a/patches.suse/block-rsxx-add-error-handling-support-for-add_disk.patch b/patches.suse/block-rsxx-add-error-handling-support-for-add_disk.patch new file mode 100644 index 0000000..7d94e3e --- /dev/null +++ b/patches.suse/block-rsxx-add-error-handling-support-for-add_disk.patch @@ -0,0 +1,71 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:01:53 -0700 +Subject: [PATCH] block/rsxx: add error handling support for add_disk() +Git-commit: 54494d10031b4bc043af43251bff0d10cca6857a +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +Signed-off-by: Luis Chamberlain +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/rsxx/core.c | 4 +++- + drivers/block/rsxx/dev.c | 12 +++++++++--- + 2 files changed, 12 insertions(+), 4 deletions(-) + +diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c +index 83636714b8d7..8d9d69f5dfbc 100644 +--- a/drivers/block/rsxx/core.c ++++ b/drivers/block/rsxx/core.c +@@ -935,7 +935,9 @@ static int rsxx_pci_probe(struct pci_dev *dev, + card->size8 = 0; + } + +- rsxx_attach_dev(card); ++ st = rsxx_attach_dev(card); ++ if (st) ++ goto failed_create_dev; + + /************* Setup Debugfs *************/ + rsxx_debugfs_dev_new(card); +diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c +index 268252380e88..dd33f1bdf3b8 100644 +--- a/drivers/block/rsxx/dev.c ++++ b/drivers/block/rsxx/dev.c +@@ -191,6 +191,8 @@ static bool rsxx_discard_supported(struct rsxx_cardinfo *card) + + int rsxx_attach_dev(struct rsxx_cardinfo *card) + { ++ int err = 0; ++ + mutex_lock(&card->dev_lock); + + /* The block device requires the stripe size from the config. */ +@@ -199,13 +201,17 @@ int rsxx_attach_dev(struct rsxx_cardinfo *card) + set_capacity(card->gendisk, card->size8 >> 9); + else + set_capacity(card->gendisk, 0); +- device_add_disk(CARD_TO_DEV(card), card->gendisk, NULL); +- card->bdev_attached = 1; ++ err = device_add_disk(CARD_TO_DEV(card), card->gendisk, NULL); ++ if (err == 0) ++ card->bdev_attached = 1; + } + + mutex_unlock(&card->dev_lock); + +- return 0; ++ if (err) ++ blk_cleanup_disk(card->gendisk); ++ ++ return err; + } + + void rsxx_detach_dev(struct rsxx_cardinfo *card) +-- +2.35.3 + diff --git a/patches.suse/block-simplify-Kconfig-files.patch b/patches.suse/block-simplify-Kconfig-files.patch new file mode 100644 index 0000000..45cafb7 --- /dev/null +++ b/patches.suse/block-simplify-Kconfig-files.patch @@ -0,0 +1,90 @@ +From: Masahiro Yamada +Date: Mon, 27 Sep 2021 22:59:58 +0900 +Subject: [PATCH] block: simplify Kconfig files +Git-commit: c50fca55d4395ae27a57dee820f6df9b6a26c295 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Everything under block/ depends on BLOCK. BLOCK_HOLDER_DEPRECATED is +selected from drivers/md/Kconfig, which is entirely dependent on BLOCK. + +Extend the 'if BLOCK' ... 'endif' so it covers the whole block/Kconfig. + +Also, clean up the definition of BLOCK_COMPAT and BLK_MQ_PCI because +COMPAT and PCI are boolean. + +Signed-off-by: Masahiro Yamada +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20210927140000.866249-3-masahiroy@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/Kconfig | 18 +++++++----------- + block/Kconfig.iosched | 4 ---- + 2 files changed, 7 insertions(+), 15 deletions(-) + +diff --git a/block/Kconfig b/block/Kconfig +index 1d83504749e7..c4d35829ea4f 100644 +--- a/block/Kconfig ++++ b/block/Kconfig +@@ -196,33 +196,29 @@ source "block/partitions/Kconfig" + + endmenu + +-endif # BLOCK +- + config BLOCK_COMPAT +- bool +- depends on BLOCK && COMPAT +- default y ++ def_bool COMPAT + + config BLK_MQ_PCI +- bool +- depends on BLOCK && PCI +- default y ++ def_bool PCI + + config BLK_MQ_VIRTIO + bool +- depends on BLOCK && VIRTIO ++ depends on VIRTIO + default y + + config BLK_MQ_RDMA + bool +- depends on BLOCK && INFINIBAND ++ depends on INFINIBAND + default y + + config BLK_PM +- def_bool BLOCK && PM ++ def_bool PM + + # do not use in new code + config BLOCK_HOLDER_DEPRECATED + bool + + source "block/Kconfig.iosched" ++ ++endif # BLOCK +diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched +index 2f2158e05a91..885fee86dfca 100644 +--- a/block/Kconfig.iosched ++++ b/block/Kconfig.iosched +@@ -1,6 +1,4 @@ + # SPDX-License-Identifier: GPL-2.0 +-if BLOCK +- + menu "IO Schedulers" + + config MQ_IOSCHED_DEADLINE +@@ -45,5 +43,3 @@ config BFQ_CGROUP_DEBUG + files in a cgroup which can be useful for debugging. + + endmenu +- +-endif +-- +2.35.3 + diff --git a/patches.suse/block-skip-elevator-fields-init-for-non-elv-queue.patch b/patches.suse/block-skip-elevator-fields-init-for-non-elv-queue.patch new file mode 100644 index 0000000..8f573b5 --- /dev/null +++ b/patches.suse/block-skip-elevator-fields-init-for-non-elv-queue.patch @@ -0,0 +1,92 @@ +From: Pavel Begunkov +Date: Mon, 18 Oct 2021 21:37:27 +0100 +Subject: [PATCH] block: skip elevator fields init for non-elv queue +Git-commit: 4f266f2be822eacd70aca2a7a53c4a111be79acb +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Don't init rq->hash and rq->rb_node in blk_mq_rq_ctx_init() if there is +no elevator. Also, move some other initialisers that imply barriers to +the end, so the compiler is free to rearrange and optimise other the +rest of them. + +note: fold in a change from Jens leaving queue_list unconditional, as +it might lead to problems otherwise. + +Signed-off-by: Pavel Begunkov +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 28 ++++++++++++++-------------- + 1 file changed, 14 insertions(+), 14 deletions(-) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 28eb1f3c6f76..1d2e2fd4043e 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -325,6 +325,10 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, + rq->internal_tag = BLK_MQ_NO_TAG; + } + ++ if (blk_mq_need_time_stamp(rq)) ++ rq->start_time_ns = ktime_get_ns(); ++ else ++ rq->start_time_ns = 0; + /* csd/requeue_work/fifo_time is initialized before use */ + rq->q = data->q; + rq->mq_ctx = data->ctx; +@@ -334,41 +338,37 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, + rq->rq_flags |= RQF_PM; + if (blk_queue_io_stat(data->q)) + rq->rq_flags |= RQF_IO_STAT; +- INIT_LIST_HEAD(&rq->queuelist); +- INIT_HLIST_NODE(&rq->hash); +- RB_CLEAR_NODE(&rq->rb_node); + rq->rq_disk = NULL; + rq->part = NULL; + #ifdef CONFIG_BLK_RQ_ALLOC_TIME + rq->alloc_time_ns = alloc_time_ns; + #endif +- if (blk_mq_need_time_stamp(rq)) +- rq->start_time_ns = ktime_get_ns(); +- else +- rq->start_time_ns = 0; + rq->io_start_time_ns = 0; + rq->stats_sectors = 0; + rq->nr_phys_segments = 0; + #if defined(CONFIG_BLK_DEV_INTEGRITY) + rq->nr_integrity_segments = 0; + #endif +- blk_crypto_rq_set_defaults(rq); +- /* tag was already set */ +- WRITE_ONCE(rq->deadline, 0); +- + rq->timeout = 0; +- + rq->end_io = NULL; + rq->end_io_data = NULL; + + data->ctx->rq_dispatched[op_is_sync(data->cmd_flags)]++; ++ blk_crypto_rq_set_defaults(rq); ++ INIT_LIST_HEAD(&rq->queuelist); ++ /* tag was already set */ ++ WRITE_ONCE(rq->deadline, 0); + refcount_set(&rq->ref, 1); + +- if (!op_is_flush(data->cmd_flags) && (rq->rq_flags & RQF_ELV)) { ++ if (rq->rq_flags & RQF_ELV) { + struct elevator_queue *e = data->q->elevator; + + rq->elv.icq = NULL; +- if (e->type->ops.prepare_request) { ++ INIT_HLIST_NODE(&rq->hash); ++ RB_CLEAR_NODE(&rq->rb_node); ++ ++ if (!op_is_flush(data->cmd_flags) && ++ e->type->ops.prepare_request) { + if (e->type->icq_cache) + blk_mq_sched_assign_ioc(rq); + +-- +2.35.3 + diff --git a/patches.suse/block-store-elevator-state-in-request.patch b/patches.suse/block-store-elevator-state-in-request.patch new file mode 100644 index 0000000..125fceb --- /dev/null +++ b/patches.suse/block-store-elevator-state-in-request.patch @@ -0,0 +1,171 @@ +From: Jens Axboe +Date: Fri, 15 Oct 2021 09:44:38 -0600 +Subject: [PATCH] block: store elevator state in request +Git-commit: 2ff0682da6e09c1e0db63a2d2abcd4efb531c8db +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Add an rq private RQF_ELV flag, which tells the block layer that this +request was initialized on a queue that has an IO scheduler attached. +This allows for faster checking in the fast path, rather than having to +deference rq->q later on. + +Elevator switching does full quiesce of the queue before detaching an +IO scheduler, so it's safe to cache this in the request itself. + +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq-sched.h | 27 ++++++++++++++++----------- + block/blk-mq.c | 20 +++++++++++--------- + include/linux/blk-mq.h | 2 ++ + 3 files changed, 29 insertions(+), 20 deletions(-) + +diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h +index fe252278ed9a..98836106b25f 100644 +--- a/block/blk-mq-sched.h ++++ b/block/blk-mq-sched.h +@@ -56,29 +56,34 @@ static inline bool + blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq, + struct bio *bio) + { +- struct elevator_queue *e = q->elevator; +- +- if (e && e->type->ops.allow_merge) +- return e->type->ops.allow_merge(q, rq, bio); ++ if (rq->rq_flags & RQF_ELV) { ++ struct elevator_queue *e = q->elevator; + ++ if (e->type->ops.allow_merge) ++ return e->type->ops.allow_merge(q, rq, bio); ++ } + return true; + } + + static inline void blk_mq_sched_completed_request(struct request *rq, u64 now) + { +- struct elevator_queue *e = rq->q->elevator; ++ if (rq->rq_flags & RQF_ELV) { ++ struct elevator_queue *e = rq->q->elevator; + +- if (e && e->type->ops.completed_request) +- e->type->ops.completed_request(rq, now); ++ if (e->type->ops.completed_request) ++ e->type->ops.completed_request(rq, now); ++ } + } + + static inline void blk_mq_sched_requeue_request(struct request *rq) + { +- struct request_queue *q = rq->q; +- struct elevator_queue *e = q->elevator; ++ if (rq->rq_flags & RQF_ELV) { ++ struct request_queue *q = rq->q; ++ struct elevator_queue *e = q->elevator; + +- if ((rq->rq_flags & RQF_ELVPRIV) && e && e->type->ops.requeue_request) +- e->type->ops.requeue_request(rq); ++ if ((rq->rq_flags & RQF_ELVPRIV) && e->type->ops.requeue_request) ++ e->type->ops.requeue_request(rq); ++ } + } + + static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx) +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 9cff9e8eada4..28eb1f3c6f76 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -306,7 +306,7 @@ void blk_mq_wake_waiters(struct request_queue *q) + */ + static inline bool blk_mq_need_time_stamp(struct request *rq) + { +- return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS)) || rq->q->elevator; ++ return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV)); + } + + static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, +@@ -316,9 +316,11 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, + struct request *rq = tags->static_rqs[tag]; + + if (data->q->elevator) { ++ rq->rq_flags = RQF_ELV; + rq->tag = BLK_MQ_NO_TAG; + rq->internal_tag = tag; + } else { ++ rq->rq_flags = 0; + rq->tag = tag; + rq->internal_tag = BLK_MQ_NO_TAG; + } +@@ -327,7 +329,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, + rq->q = data->q; + rq->mq_ctx = data->ctx; + rq->mq_hctx = data->hctx; +- rq->rq_flags = 0; + rq->cmd_flags = data->cmd_flags; + if (data->flags & BLK_MQ_REQ_PM) + rq->rq_flags |= RQF_PM; +@@ -363,11 +364,11 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, + data->ctx->rq_dispatched[op_is_sync(data->cmd_flags)]++; + refcount_set(&rq->ref, 1); + +- if (!op_is_flush(data->cmd_flags)) { ++ if (!op_is_flush(data->cmd_flags) && (rq->rq_flags & RQF_ELV)) { + struct elevator_queue *e = data->q->elevator; + + rq->elv.icq = NULL; +- if (e && e->type->ops.prepare_request) { ++ if (e->type->ops.prepare_request) { + if (e->type->icq_cache) + blk_mq_sched_assign_ioc(rq); + +@@ -588,12 +589,13 @@ static void __blk_mq_free_request(struct request *rq) + void blk_mq_free_request(struct request *rq) + { + struct request_queue *q = rq->q; +- struct elevator_queue *e = q->elevator; + struct blk_mq_ctx *ctx = rq->mq_ctx; + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; + +- if (rq->rq_flags & RQF_ELVPRIV) { +- if (e && e->type->ops.finish_request) ++ if (rq->rq_flags & (RQF_ELVPRIV | RQF_ELV)) { ++ struct elevator_queue *e = q->elevator; ++ ++ if (e->type->ops.finish_request) + e->type->ops.finish_request(rq); + if (rq->elv.icq) { + put_io_context(rq->elv.icq->ioc); +@@ -2254,7 +2256,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, + goto insert; + } + +- if (q->elevator && !bypass_insert) ++ if ((rq->rq_flags & RQF_ELV) && !bypass_insert) + goto insert; + + budget_token = blk_mq_get_dispatch_budget(q); +@@ -2492,7 +2494,7 @@ void blk_mq_submit_bio(struct bio *bio) + } + + blk_add_rq_to_plug(plug, rq); +- } else if (q->elevator) { ++ } else if (rq->rq_flags & RQF_ELV) { + /* Insert the request at the IO scheduler queue */ + blk_mq_sched_insert_request(rq, false, true, true); + } else if (plug && !blk_queue_nomerges(q)) { +diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h +index 8ca9728cc7f2..95c3bd3a008e 100644 +--- a/include/linux/blk-mq.h ++++ b/include/linux/blk-mq.h +@@ -55,6 +55,8 @@ typedef __u32 __bitwise req_flags_t; + #define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20)) + /* ->timeout has been called, don't expire again */ + #define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) ++/* queue has elevator attached */ ++#define RQF_ELV ((__force req_flags_t)(1 << 22)) + + /* flags that prevent us from merging requests: */ + #define RQF_NOMERGE_FLAGS \ +-- +2.35.3 + diff --git a/patches.suse/block-swim3-add-error-handling-support-for-add_disk.patch b/patches.suse/block-swim3-add-error-handling-support-for-add_disk.patch new file mode 100644 index 0000000..3b6c2bb --- /dev/null +++ b/patches.suse/block-swim3-add-error-handling-support-for-add_disk.patch @@ -0,0 +1,37 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:02:49 -0700 +Subject: [PATCH] block/swim3: add error handling support for add_disk() +Git-commit: 2d4bcf76429713c2ca0093c11b5b75072db95a50 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +Signed-off-by: Luis Chamberlain +Link: https://lore.kernel.org/r/20210927220302.1073499-2-mcgrof@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/swim3.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c +index 965af0a3e95b..f7e3482e846b 100644 +--- a/drivers/block/swim3.c ++++ b/drivers/block/swim3.c +@@ -1229,7 +1229,9 @@ static int swim3_attach(struct macio_dev *mdev, + disk->flags |= GENHD_FL_REMOVABLE; + sprintf(disk->disk_name, "fd%d", floppy_count); + set_capacity(disk, 2880); +- add_disk(disk); ++ rc = add_disk(disk); ++ if (rc) ++ goto out_cleanup_disk; + + disks[floppy_count++] = disk; + return 0; +-- +2.35.3 + diff --git a/patches.suse/block-switch-polling-to-be-bio-based.patch b/patches.suse/block-switch-polling-to-be-bio-based.patch new file mode 100644 index 0000000..142cbc4 --- /dev/null +++ b/patches.suse/block-switch-polling-to-be-bio-based.patch @@ -0,0 +1,1782 @@ +From: Christoph Hellwig +Date: Tue, 12 Oct 2021 13:12:24 +0200 +Subject: [PATCH] block: switch polling to be bio based +Git-commit: 3e08773c3841e9db7a520908cc2b136a77d275ff +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Replace the blk_poll interface that requires the caller to keep a queue +and cookie from the submissions with polling based on the bio. + +Polling for the bio itself leads to a few advantages: + + - the cookie construction can made entirely private in blk-mq.c + - the caller does not need to remember the request_queue and cookie + separately and thus sidesteps their lifetime issues + - keeping the device and the cookie inside the bio allows to trivially + support polling BIOs remapping by stacking drivers + - a lot of code to propagate the cookie back up the submission path can + be removed entirely. + +Signed-off-by: Christoph Hellwig +Tested-by: Mark Wunderlich +Link: https://lore.kernel.org/r/20211012111226.760968-15-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + arch/m68k/emu/nfblock.c | 3 +- + arch/xtensa/platforms/iss/simdisk.c | 3 +- + block/bio.c | 1 + + block/blk-core.c | 127 +++++++++++++++++++++------- + block/blk-exec.c | 10 ++- + block/blk-mq.c | 72 +++++----------- + block/blk-mq.h | 2 + + block/fops.c | 25 ++---- + drivers/block/brd.c | 12 ++- + drivers/block/drbd/drbd_int.h | 2 +- + drivers/block/drbd/drbd_req.c | 3 +- + drivers/block/n64cart.c | 12 ++- + drivers/block/null_blk/main.c | 3 +- + drivers/block/pktcdvd.c | 7 +- + drivers/block/ps3vram.c | 6 +- + drivers/block/rsxx/dev.c | 7 +- + drivers/block/zram/zram_drv.c | 10 +-- + drivers/md/bcache/request.c | 13 ++- + drivers/md/bcache/request.h | 4 +- + drivers/md/dm.c | 28 +++--- + drivers/md/md.c | 10 +-- + drivers/nvdimm/blk.c | 5 +- + drivers/nvdimm/btt.c | 5 +- + drivers/nvdimm/pmem.c | 3 +- + drivers/nvme/host/multipath.c | 6 +- + drivers/s390/block/dcssblk.c | 7 +- + fs/btrfs/inode.c | 8 +- + fs/ext4/file.c | 2 +- + fs/gfs2/file.c | 4 +- + fs/iomap/direct-io.c | 36 +++----- + fs/xfs/xfs_file.c | 2 +- + fs/zonefs/super.c | 2 +- + include/linux/bio.h | 2 +- + include/linux/blk-mq.h | 15 +--- + include/linux/blk_types.h | 12 ++- + include/linux/blkdev.h | 8 +- + include/linux/fs.h | 6 +- + include/linux/iomap.h | 5 +- + mm/page_io.c | 8 +- + 39 files changed, 232 insertions(+), 264 deletions(-) + +diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c +index 9a8394e96388..4ef457ba5220 100644 +--- a/arch/m68k/emu/nfblock.c ++++ b/arch/m68k/emu/nfblock.c +@@ -58,7 +58,7 @@ struct nfhd_device { + struct gendisk *disk; + }; + +-static blk_qc_t nfhd_submit_bio(struct bio *bio) ++static void nfhd_submit_bio(struct bio *bio) + { + struct nfhd_device *dev = bio->bi_bdev->bd_disk->private_data; + struct bio_vec bvec; +@@ -76,7 +76,6 @@ static blk_qc_t nfhd_submit_bio(struct bio *bio) + sec += len; + } + bio_endio(bio); +- return BLK_QC_T_NONE; + } + + static int nfhd_getgeo(struct block_device *bdev, struct hd_geometry *geo) +diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c +index 3cdfa00738e0..ddd1fe3db474 100644 +--- a/arch/xtensa/platforms/iss/simdisk.c ++++ b/arch/xtensa/platforms/iss/simdisk.c +@@ -100,7 +100,7 @@ static void simdisk_transfer(struct simdisk *dev, unsigned long sector, + spin_unlock(&dev->lock); + } + +-static blk_qc_t simdisk_submit_bio(struct bio *bio) ++static void simdisk_submit_bio(struct bio *bio) + { + struct simdisk *dev = bio->bi_bdev->bd_disk->private_data; + struct bio_vec bvec; +@@ -118,7 +118,6 @@ static blk_qc_t simdisk_submit_bio(struct bio *bio) + } + + bio_endio(bio); +- return BLK_QC_T_NONE; + } + + static int simdisk_open(struct block_device *bdev, fmode_t mode) +diff --git a/block/bio.c b/block/bio.c +index df45f4b996ac..a3c9ff23a036 100644 +--- a/block/bio.c ++++ b/block/bio.c +@@ -282,6 +282,7 @@ void bio_init(struct bio *bio, struct bio_vec *table, + + atomic_set(&bio->__bi_remaining, 1); + atomic_set(&bio->__bi_cnt, 1); ++ bio->bi_cookie = BLK_QC_T_NONE; + + bio->bi_max_vecs = max_vecs; + bio->bi_io_vec = table; +diff --git a/block/blk-core.c b/block/blk-core.c +index 8eb0e08d5395..f008c38ae967 100644 +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -915,25 +915,22 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio) + return false; + } + +-static blk_qc_t __submit_bio(struct bio *bio) ++static void __submit_bio(struct bio *bio) + { + struct gendisk *disk = bio->bi_bdev->bd_disk; +- blk_qc_t ret = BLK_QC_T_NONE; + + if (unlikely(bio_queue_enter(bio) != 0)) +- return BLK_QC_T_NONE; ++ return; + + if (!submit_bio_checks(bio) || !blk_crypto_bio_prep(&bio)) + goto queue_exit; +- if (disk->fops->submit_bio) { +- ret = disk->fops->submit_bio(bio); +- goto queue_exit; ++ if (!disk->fops->submit_bio) { ++ blk_mq_submit_bio(bio); ++ return; + } +- return blk_mq_submit_bio(bio); +- ++ disk->fops->submit_bio(bio); + queue_exit: + blk_queue_exit(disk->queue); +- return ret; + } + + /* +@@ -955,10 +952,9 @@ static blk_qc_t __submit_bio(struct bio *bio) + * bio_list_on_stack[1] contains bios that were submitted before the current + * ->submit_bio_bio, but that haven't been processed yet. + */ +-static blk_qc_t __submit_bio_noacct(struct bio *bio) ++static void __submit_bio_noacct(struct bio *bio) + { + struct bio_list bio_list_on_stack[2]; +- blk_qc_t ret = BLK_QC_T_NONE; + + BUG_ON(bio->bi_next); + +@@ -975,7 +971,7 @@ static blk_qc_t __submit_bio_noacct(struct bio *bio) + bio_list_on_stack[1] = bio_list_on_stack[0]; + bio_list_init(&bio_list_on_stack[0]); + +- ret = __submit_bio(bio); ++ __submit_bio(bio); + + /* + * Sort new bios into those for a lower level and those for the +@@ -998,22 +994,19 @@ static blk_qc_t __submit_bio_noacct(struct bio *bio) + } while ((bio = bio_list_pop(&bio_list_on_stack[0]))); + + current->bio_list = NULL; +- return ret; + } + +-static blk_qc_t __submit_bio_noacct_mq(struct bio *bio) ++static void __submit_bio_noacct_mq(struct bio *bio) + { + struct bio_list bio_list[2] = { }; +- blk_qc_t ret; + + current->bio_list = bio_list; + + do { +- ret = __submit_bio(bio); ++ __submit_bio(bio); + } while ((bio = bio_list_pop(&bio_list[0]))); + + current->bio_list = NULL; +- return ret; + } + + /** +@@ -1025,7 +1018,7 @@ static blk_qc_t __submit_bio_noacct_mq(struct bio *bio) + * systems and other upper level users of the block layer should use + * submit_bio() instead. + */ +-blk_qc_t submit_bio_noacct(struct bio *bio) ++void submit_bio_noacct(struct bio *bio) + { + /* + * We only want one ->submit_bio to be active at a time, else stack +@@ -1033,14 +1026,12 @@ blk_qc_t submit_bio_noacct(struct bio *bio) + * to collect a list of requests submited by a ->submit_bio method while + * it is active, and then process them after it returned. + */ +- if (current->bio_list) { ++ if (current->bio_list) + bio_list_add(¤t->bio_list[0], bio); +- return BLK_QC_T_NONE; +- } +- +- if (!bio->bi_bdev->bd_disk->fops->submit_bio) +- return __submit_bio_noacct_mq(bio); +- return __submit_bio_noacct(bio); ++ else if (!bio->bi_bdev->bd_disk->fops->submit_bio) ++ __submit_bio_noacct_mq(bio); ++ else ++ __submit_bio_noacct(bio); + } + EXPORT_SYMBOL(submit_bio_noacct); + +@@ -1057,10 +1048,10 @@ EXPORT_SYMBOL(submit_bio_noacct); + * in @bio. The bio must NOT be touched by thecaller until ->bi_end_io() has + * been called. + */ +-blk_qc_t submit_bio(struct bio *bio) ++void submit_bio(struct bio *bio) + { + if (blkcg_punt_bio_submit(bio)) +- return BLK_QC_T_NONE; ++ return; + + /* + * If it's a regular read/write or a barrier with data attached, +@@ -1092,19 +1083,91 @@ blk_qc_t submit_bio(struct bio *bio) + if (unlikely(bio_op(bio) == REQ_OP_READ && + bio_flagged(bio, BIO_WORKINGSET))) { + unsigned long pflags; +- blk_qc_t ret; + + psi_memstall_enter(&pflags); +- ret = submit_bio_noacct(bio); ++ submit_bio_noacct(bio); + psi_memstall_leave(&pflags); +- +- return ret; ++ return; + } + +- return submit_bio_noacct(bio); ++ submit_bio_noacct(bio); + } + EXPORT_SYMBOL(submit_bio); + ++/** ++ * bio_poll - poll for BIO completions ++ * @bio: bio to poll for ++ * @flags: BLK_POLL_* flags that control the behavior ++ * ++ * Poll for completions on queue associated with the bio. Returns number of ++ * completed entries found. ++ * ++ * Note: the caller must either be the context that submitted @bio, or ++ * be in a RCU critical section to prevent freeing of @bio. ++ */ ++int bio_poll(struct bio *bio, unsigned int flags) ++{ ++ struct request_queue *q = bio->bi_bdev->bd_disk->queue; ++ blk_qc_t cookie = READ_ONCE(bio->bi_cookie); ++ int ret; ++ ++ if (cookie == BLK_QC_T_NONE || ++ !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) ++ return 0; ++ ++ if (current->plug) ++ blk_flush_plug_list(current->plug, false); ++ ++ if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT)) ++ return 0; ++ if (WARN_ON_ONCE(!queue_is_mq(q))) ++ ret = 0; /* not yet implemented, should not happen */ ++ else ++ ret = blk_mq_poll(q, cookie, flags); ++ blk_queue_exit(q); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(bio_poll); ++ ++/* ++ * Helper to implement file_operations.iopoll. Requires the bio to be stored ++ * in iocb->private, and cleared before freeing the bio. ++ */ ++int iocb_bio_iopoll(struct kiocb *kiocb, unsigned int flags) ++{ ++ struct bio *bio; ++ int ret = 0; ++ ++ /* ++ * Note: the bio cache only uses SLAB_TYPESAFE_BY_RCU, so bio can ++ * point to a freshly allocated bio at this point. If that happens ++ * we have a few cases to consider: ++ * ++ * 1) the bio is beeing initialized and bi_bdev is NULL. We can just ++ * simply nothing in this case ++ * 2) the bio points to a not poll enabled device. bio_poll will catch ++ * this and return 0 ++ * 3) the bio points to a poll capable device, including but not ++ * limited to the one that the original bio pointed to. In this ++ * case we will call into the actual poll method and poll for I/O, ++ * even if we don't need to, but it won't cause harm either. ++ * ++ * For cases 2) and 3) above the RCU grace period ensures that bi_bdev ++ * is still allocated. Because partitions hold a reference to the whole ++ * device bdev and thus disk, the disk is also still valid. Grabbing ++ * a reference to the queue in bio_poll() ensures the hctxs and requests ++ * are still valid as well. ++ */ ++ rcu_read_lock(); ++ bio = READ_ONCE(kiocb->private); ++ if (bio && bio->bi_bdev) ++ ret = bio_poll(bio, flags); ++ rcu_read_unlock(); ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(iocb_bio_iopoll); ++ + /** + * blk_cloned_rq_check_limits - Helper function to check a cloned request + * for the new queue limits +diff --git a/block/blk-exec.c b/block/blk-exec.c +index 1fa7f25e5726..55f0cd34b37b 100644 +--- a/block/blk-exec.c ++++ b/block/blk-exec.c +@@ -65,13 +65,19 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); + + static bool blk_rq_is_poll(struct request *rq) + { +- return rq->mq_hctx && rq->mq_hctx->type == HCTX_TYPE_POLL; ++ if (!rq->mq_hctx) ++ return false; ++ if (rq->mq_hctx->type != HCTX_TYPE_POLL) ++ return false; ++ if (WARN_ON_ONCE(!rq->bio)) ++ return false; ++ return true; + } + + static void blk_rq_poll_completion(struct request *rq, struct completion *wait) + { + do { +- blk_poll(rq->q, request_to_qc_t(rq->mq_hctx, rq), 0); ++ bio_poll(rq->bio, 0); + cond_resched(); + } while (!completion_done(wait)); + } +diff --git a/block/blk-mq.c b/block/blk-mq.c +index a34ffcf861c3..0860f622099f 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -65,6 +65,9 @@ static int blk_mq_poll_stats_bkt(const struct request *rq) + return bucket; + } + ++#define BLK_QC_T_SHIFT 16 ++#define BLK_QC_T_INTERNAL (1U << 31) ++ + static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q, + blk_qc_t qc) + { +@@ -81,6 +84,13 @@ static inline struct request *blk_qc_to_rq(struct blk_mq_hw_ctx *hctx, + return blk_mq_tag_to_rq(hctx->tags, tag); + } + ++static inline blk_qc_t blk_rq_to_qc(struct request *rq) ++{ ++ return (rq->mq_hctx->queue_num << BLK_QC_T_SHIFT) | ++ (rq->tag != -1 ? ++ rq->tag : (rq->internal_tag | BLK_QC_T_INTERNAL)); ++} ++ + /* + * Check if any of the ctx, dispatch list or elevator + * have pending work in this hardware queue. +@@ -819,6 +829,8 @@ void blk_mq_start_request(struct request *rq) + if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE) + q->integrity.profile->prepare_fn(rq); + #endif ++ if (rq->bio && rq->bio->bi_opf & REQ_POLLED) ++ WRITE_ONCE(rq->bio->bi_cookie, blk_rq_to_qc(rq)); + } + EXPORT_SYMBOL(blk_mq_start_request); + +@@ -2045,19 +2057,15 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio, + } + + static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, +- struct request *rq, +- blk_qc_t *cookie, bool last) ++ struct request *rq, bool last) + { + struct request_queue *q = rq->q; + struct blk_mq_queue_data bd = { + .rq = rq, + .last = last, + }; +- blk_qc_t new_cookie; + blk_status_t ret; + +- new_cookie = request_to_qc_t(hctx, rq); +- + /* + * For OK queue, we are done. For error, caller may kill it. + * Any other error (busy), just add it to our list as we +@@ -2067,7 +2075,6 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, + switch (ret) { + case BLK_STS_OK: + blk_mq_update_dispatch_busy(hctx, false); +- *cookie = new_cookie; + break; + case BLK_STS_RESOURCE: + case BLK_STS_DEV_RESOURCE: +@@ -2076,7 +2083,6 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, + break; + default: + blk_mq_update_dispatch_busy(hctx, false); +- *cookie = BLK_QC_T_NONE; + break; + } + +@@ -2085,7 +2091,6 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, + + static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, + struct request *rq, +- blk_qc_t *cookie, + bool bypass_insert, bool last) + { + struct request_queue *q = rq->q; +@@ -2119,7 +2124,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, + goto insert; + } + +- return __blk_mq_issue_directly(hctx, rq, cookie, last); ++ return __blk_mq_issue_directly(hctx, rq, last); + insert: + if (bypass_insert) + return BLK_STS_RESOURCE; +@@ -2133,7 +2138,6 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, + * blk_mq_try_issue_directly - Try to send a request directly to device driver. + * @hctx: Pointer of the associated hardware queue. + * @rq: Pointer to request to be sent. +- * @cookie: Request queue cookie. + * + * If the device has enough resources to accept a new request now, send the + * request directly to device driver. Else, insert at hctx->dispatch queue, so +@@ -2141,7 +2145,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, + * queue have higher priority. + */ + static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, +- struct request *rq, blk_qc_t *cookie) ++ struct request *rq) + { + blk_status_t ret; + int srcu_idx; +@@ -2150,7 +2154,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, + + hctx_lock(hctx, &srcu_idx); + +- ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true); ++ ret = __blk_mq_try_issue_directly(hctx, rq, false, true); + if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) + blk_mq_request_bypass_insert(rq, false, true); + else if (ret != BLK_STS_OK) +@@ -2163,11 +2167,10 @@ blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last) + { + blk_status_t ret; + int srcu_idx; +- blk_qc_t unused_cookie; + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; + + hctx_lock(hctx, &srcu_idx); +- ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true, last); ++ ret = __blk_mq_try_issue_directly(hctx, rq, true, last); + hctx_unlock(hctx, srcu_idx); + + return ret; +@@ -2247,10 +2250,8 @@ static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug) + * + * It will not queue the request if there is an error with the bio, or at the + * request creation. +- * +- * Returns: Request queue cookie. + */ +-blk_qc_t blk_mq_submit_bio(struct bio *bio) ++void blk_mq_submit_bio(struct bio *bio) + { + struct request_queue *q = bio->bi_bdev->bd_disk->queue; + const int is_sync = op_is_sync(bio->bi_opf); +@@ -2259,9 +2260,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) + struct blk_plug *plug; + struct request *same_queue_rq = NULL; + unsigned int nr_segs; +- blk_qc_t cookie; + blk_status_t ret; +- bool hipri; + + blk_queue_bounce(q, &bio); + __blk_queue_split(&bio, &nr_segs); +@@ -2278,8 +2277,6 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) + + rq_qos_throttle(q, bio); + +- hipri = bio->bi_opf & REQ_POLLED; +- + plug = blk_mq_plug(q, bio); + if (plug && plug->cached_rq) { + rq = plug->cached_rq; +@@ -2310,8 +2307,6 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) + + rq_qos_track(q, rq, bio); + +- cookie = request_to_qc_t(rq->mq_hctx, rq); +- + blk_mq_bio_to_request(rq, bio, nr_segs); + + ret = blk_crypto_init_request(rq); +@@ -2319,7 +2314,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) + bio->bi_status = ret; + bio_endio(bio); + blk_mq_free_request(rq); +- return BLK_QC_T_NONE; ++ return; + } + + if (unlikely(is_flush_fua)) { +@@ -2375,7 +2370,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) + if (same_queue_rq) { + trace_block_unplug(q, 1, true); + blk_mq_try_issue_directly(same_queue_rq->mq_hctx, +- same_queue_rq, &cookie); ++ same_queue_rq); + } + } else if ((q->nr_hw_queues > 1 && is_sync) || + !rq->mq_hctx->dispatch_busy) { +@@ -2383,18 +2378,15 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) + * There is no scheduler and we can try to send directly + * to the hardware. + */ +- blk_mq_try_issue_directly(rq->mq_hctx, rq, &cookie); ++ blk_mq_try_issue_directly(rq->mq_hctx, rq); + } else { + /* Default case. */ + blk_mq_sched_insert_request(rq, false, true, true); + } + +- if (!hipri) +- return BLK_QC_T_NONE; +- return cookie; ++ return; + queue_exit: + blk_queue_exit(q); +- return BLK_QC_T_NONE; + } + + static size_t order_to_size(unsigned int order) +@@ -4084,25 +4076,8 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie, + return 0; + } + +-/** +- * blk_poll - poll for IO completions +- * @q: the queue +- * @cookie: cookie passed back at IO submission time +- * @flags: BLK_POLL_* flags that control the behavior +- * +- * Description: +- * Poll for completions on the passed in queue. Returns number of +- * completed entries found. +- */ +-int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags) ++int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags) + { +- if (cookie == BLK_QC_T_NONE || +- !test_bit(QUEUE_FLAG_POLL, &q->queue_flags)) +- return 0; +- +- if (current->plug) +- blk_flush_plug_list(current->plug, false); +- + if (!(flags & BLK_POLL_NOSLEEP) && + q->poll_nsec != BLK_MQ_POLL_CLASSIC) { + if (blk_mq_poll_hybrid(q, cookie)) +@@ -4110,7 +4085,6 @@ int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags) + } + return blk_mq_poll_classic(q, cookie, flags); + } +-EXPORT_SYMBOL_GPL(blk_poll); + + unsigned int blk_mq_rq_cpu(struct request *rq) + { +diff --git a/block/blk-mq.h b/block/blk-mq.h +index a9fe01e14951..8be447995106 100644 +--- a/block/blk-mq.h ++++ b/block/blk-mq.h +@@ -37,6 +37,8 @@ struct blk_mq_ctx { + struct kobject kobj; + } ____cacheline_aligned_in_smp; + ++void blk_mq_submit_bio(struct bio *bio); ++int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags); + void blk_mq_exit_queue(struct request_queue *q); + int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); + void blk_mq_wake_waiters(struct request_queue *q); +diff --git a/block/fops.c b/block/fops.c +index db8f2fe68dd2..ce1255529ba2 100644 +--- a/block/fops.c ++++ b/block/fops.c +@@ -61,7 +61,6 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, + bool should_dirty = false; + struct bio bio; + ssize_t ret; +- blk_qc_t qc; + + if ((pos | iov_iter_alignment(iter)) & + (bdev_logical_block_size(bdev) - 1)) +@@ -102,13 +101,12 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, + if (iocb->ki_flags & IOCB_HIPRI) + bio_set_polled(&bio, iocb); + +- qc = submit_bio(&bio); ++ submit_bio(&bio); + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (!READ_ONCE(bio.bi_private)) + break; +- if (!(iocb->ki_flags & IOCB_HIPRI) || +- !blk_poll(bdev_get_queue(bdev), qc, 0)) ++ if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, 0)) + blk_io_schedule(); + } + __set_current_state(TASK_RUNNING); +@@ -141,14 +139,6 @@ struct blkdev_dio { + + static struct bio_set blkdev_dio_pool; + +-static int blkdev_iopoll(struct kiocb *kiocb, unsigned int flags) +-{ +- struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host); +- struct request_queue *q = bdev_get_queue(bdev); +- +- return blk_poll(q, READ_ONCE(kiocb->ki_cookie), flags); +-} +- + static void blkdev_bio_end_io(struct bio *bio) + { + struct blkdev_dio *dio = bio->bi_private; +@@ -162,6 +152,8 @@ static void blkdev_bio_end_io(struct bio *bio) + struct kiocb *iocb = dio->iocb; + ssize_t ret; + ++ WRITE_ONCE(iocb->private, NULL); ++ + if (likely(!dio->bio.bi_status)) { + ret = dio->size; + iocb->ki_pos += ret; +@@ -200,7 +192,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + bool do_poll = (iocb->ki_flags & IOCB_HIPRI); + bool is_read = (iov_iter_rw(iter) == READ), is_sync; + loff_t pos = iocb->ki_pos; +- blk_qc_t qc = BLK_QC_T_NONE; + int ret = 0; + + if ((pos | iov_iter_alignment(iter)) & +@@ -262,9 +253,9 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + if (!nr_pages) { + if (do_poll) + bio_set_polled(bio, iocb); +- qc = submit_bio(bio); ++ submit_bio(bio); + if (do_poll) +- WRITE_ONCE(iocb->ki_cookie, qc); ++ WRITE_ONCE(iocb->private, bio); + break; + } + if (!dio->multi_bio) { +@@ -297,7 +288,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + if (!READ_ONCE(dio->waiter)) + break; + +- if (!do_poll || !blk_poll(bdev_get_queue(bdev), qc, 0)) ++ if (!do_poll || !bio_poll(bio, 0)) + blk_io_schedule(); + } + __set_current_state(TASK_RUNNING); +@@ -594,7 +585,7 @@ const struct file_operations def_blk_fops = { + .llseek = blkdev_llseek, + .read_iter = blkdev_read_iter, + .write_iter = blkdev_write_iter, +- .iopoll = blkdev_iopoll, ++ .iopoll = iocb_bio_iopoll, + .mmap = generic_file_mmap, + .fsync = blkdev_fsync, + .unlocked_ioctl = blkdev_ioctl, +diff --git a/drivers/block/brd.c b/drivers/block/brd.c +index 530b31240203..aa0472718dce 100644 +--- a/drivers/block/brd.c ++++ b/drivers/block/brd.c +@@ -282,7 +282,7 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page, + return err; + } + +-static blk_qc_t brd_submit_bio(struct bio *bio) ++static void brd_submit_bio(struct bio *bio) + { + struct brd_device *brd = bio->bi_bdev->bd_disk->private_data; + sector_t sector = bio->bi_iter.bi_sector; +@@ -299,16 +299,14 @@ static blk_qc_t brd_submit_bio(struct bio *bio) + + err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset, + bio_op(bio), sector); +- if (err) +- goto io_error; ++ if (err) { ++ bio_io_error(bio); ++ return; ++ } + sector += len >> SECTOR_SHIFT; + } + + bio_endio(bio); +- return BLK_QC_T_NONE; +-io_error: +- bio_io_error(bio); +- return BLK_QC_T_NONE; + } + + static int brd_rw_page(struct block_device *bdev, sector_t sector, +diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h +index 5d9181382ce1..6674a0b88341 100644 +--- a/drivers/block/drbd/drbd_int.h ++++ b/drivers/block/drbd/drbd_int.h +@@ -1448,7 +1448,7 @@ extern void conn_free_crypto(struct drbd_connection *connection); + /* drbd_req */ + extern void do_submit(struct work_struct *ws); + extern void __drbd_make_request(struct drbd_device *, struct bio *); +-extern blk_qc_t drbd_submit_bio(struct bio *bio); ++void drbd_submit_bio(struct bio *bio); + extern int drbd_read_remote(struct drbd_device *device, struct drbd_request *req); + extern int is_valid_ar_handle(struct drbd_request *, sector_t); + +diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c +index 5ca233644d70..3235532ae077 100644 +--- a/drivers/block/drbd/drbd_req.c ++++ b/drivers/block/drbd/drbd_req.c +@@ -1596,7 +1596,7 @@ void do_submit(struct work_struct *ws) + } + } + +-blk_qc_t drbd_submit_bio(struct bio *bio) ++void drbd_submit_bio(struct bio *bio) + { + struct drbd_device *device = bio->bi_bdev->bd_disk->private_data; + +@@ -1609,7 +1609,6 @@ blk_qc_t drbd_submit_bio(struct bio *bio) + + inc_ap_bio(device); + __drbd_make_request(device, bio); +- return BLK_QC_T_NONE; + } + + static bool net_timeout_reached(struct drbd_request *net_req, +diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c +index 26798da661bd..b168ca25b6c9 100644 +--- a/drivers/block/n64cart.c ++++ b/drivers/block/n64cart.c +@@ -84,7 +84,7 @@ static bool n64cart_do_bvec(struct device *dev, struct bio_vec *bv, u32 pos) + return true; + } + +-static blk_qc_t n64cart_submit_bio(struct bio *bio) ++static void n64cart_submit_bio(struct bio *bio) + { + struct bio_vec bvec; + struct bvec_iter iter; +@@ -92,16 +92,14 @@ static blk_qc_t n64cart_submit_bio(struct bio *bio) + u32 pos = bio->bi_iter.bi_sector << SECTOR_SHIFT; + + bio_for_each_segment(bvec, bio, iter) { +- if (!n64cart_do_bvec(dev, &bvec, pos)) +- goto io_error; ++ if (!n64cart_do_bvec(dev, &bvec, pos)) { ++ bio_io_error(bio); ++ return; ++ } + pos += bvec.bv_len; + } + + bio_endio(bio); +- return BLK_QC_T_NONE; +-io_error: +- bio_io_error(bio); +- return BLK_QC_T_NONE; + } + + static const struct block_device_operations n64cart_fops = { +diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c +index 187d779c8ca0..e5cbcf582233 100644 +--- a/drivers/block/null_blk/main.c ++++ b/drivers/block/null_blk/main.c +@@ -1422,7 +1422,7 @@ static struct nullb_queue *nullb_to_queue(struct nullb *nullb) + return &nullb->queues[index]; + } + +-static blk_qc_t null_submit_bio(struct bio *bio) ++static void null_submit_bio(struct bio *bio) + { + sector_t sector = bio->bi_iter.bi_sector; + sector_t nr_sectors = bio_sectors(bio); +@@ -1434,7 +1434,6 @@ static blk_qc_t null_submit_bio(struct bio *bio) + cmd->bio = bio; + + null_handle_cmd(cmd, sector, nr_sectors, bio_op(bio)); +- return BLK_QC_T_NONE; + } + + static bool should_timeout_request(struct request *rq) +diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c +index 0f26b2510a75..cb52cce6fb03 100644 +--- a/drivers/block/pktcdvd.c ++++ b/drivers/block/pktcdvd.c +@@ -2400,7 +2400,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio) + } + } + +-static blk_qc_t pkt_submit_bio(struct bio *bio) ++static void pkt_submit_bio(struct bio *bio) + { + struct pktcdvd_device *pd; + char b[BDEVNAME_SIZE]; +@@ -2423,7 +2423,7 @@ static blk_qc_t pkt_submit_bio(struct bio *bio) + */ + if (bio_data_dir(bio) == READ) { + pkt_make_request_read(pd, bio); +- return BLK_QC_T_NONE; ++ return; + } + + if (!test_bit(PACKET_WRITABLE, &pd->flags)) { +@@ -2455,10 +2455,9 @@ static blk_qc_t pkt_submit_bio(struct bio *bio) + pkt_make_request_write(bio->bi_bdev->bd_disk->queue, split); + } while (split != bio); + +- return BLK_QC_T_NONE; ++ return; + end_io: + bio_io_error(bio); +- return BLK_QC_T_NONE; + } + + static void pkt_init_queue(struct pktcdvd_device *pd) +diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c +index c7b19e128b03..d1ebf193cb9a 100644 +--- a/drivers/block/ps3vram.c ++++ b/drivers/block/ps3vram.c +@@ -578,7 +578,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev, + return next; + } + +-static blk_qc_t ps3vram_submit_bio(struct bio *bio) ++static void ps3vram_submit_bio(struct bio *bio) + { + struct ps3_system_bus_device *dev = bio->bi_bdev->bd_disk->private_data; + struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); +@@ -594,13 +594,11 @@ static blk_qc_t ps3vram_submit_bio(struct bio *bio) + spin_unlock_irq(&priv->lock); + + if (busy) +- return BLK_QC_T_NONE; ++ return; + + do { + bio = ps3vram_do_bio(dev, bio); + } while (bio); +- +- return BLK_QC_T_NONE; + } + + static const struct block_device_operations ps3vram_fops = { +diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c +index 1cc40b0ea761..268252380e88 100644 +--- a/drivers/block/rsxx/dev.c ++++ b/drivers/block/rsxx/dev.c +@@ -50,7 +50,7 @@ struct rsxx_bio_meta { + + static struct kmem_cache *bio_meta_pool; + +-static blk_qc_t rsxx_submit_bio(struct bio *bio); ++static void rsxx_submit_bio(struct bio *bio); + + /*----------------- Block Device Operations -----------------*/ + static int rsxx_blkdev_ioctl(struct block_device *bdev, +@@ -120,7 +120,7 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card, + } + } + +-static blk_qc_t rsxx_submit_bio(struct bio *bio) ++static void rsxx_submit_bio(struct bio *bio) + { + struct rsxx_cardinfo *card = bio->bi_bdev->bd_disk->private_data; + struct rsxx_bio_meta *bio_meta; +@@ -169,7 +169,7 @@ static blk_qc_t rsxx_submit_bio(struct bio *bio) + if (st) + goto queue_err; + +- return BLK_QC_T_NONE; ++ return; + + queue_err: + kmem_cache_free(bio_meta_pool, bio_meta); +@@ -177,7 +177,6 @@ static blk_qc_t rsxx_submit_bio(struct bio *bio) + if (st) + bio->bi_status = st; + bio_endio(bio); +- return BLK_QC_T_NONE; + } + + /*----------------- Device Setup -------------------*/ +diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c +index fcaf2750f68f..a68297fb51a2 100644 +--- a/drivers/block/zram/zram_drv.c ++++ b/drivers/block/zram/zram_drv.c +@@ -1598,22 +1598,18 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) + /* + * Handler function for all zram I/O requests. + */ +-static blk_qc_t zram_submit_bio(struct bio *bio) ++static void zram_submit_bio(struct bio *bio) + { + struct zram *zram = bio->bi_bdev->bd_disk->private_data; + + if (!valid_io_request(zram, bio->bi_iter.bi_sector, + bio->bi_iter.bi_size)) { + atomic64_inc(&zram->stats.invalid_io); +- goto error; ++ bio_io_error(bio); ++ return; + } + + __zram_make_request(zram, bio); +- return BLK_QC_T_NONE; +- +-error: +- bio_io_error(bio); +- return BLK_QC_T_NONE; + } + + static void zram_slot_free_notify(struct block_device *bdev, +diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c +index 6d1de889baeb..23b28edae90f 100644 +--- a/drivers/md/bcache/request.c ++++ b/drivers/md/bcache/request.c +@@ -1163,7 +1163,7 @@ static void quit_max_writeback_rate(struct cache_set *c, + + /* Cached devices - read & write stuff */ + +-blk_qc_t cached_dev_submit_bio(struct bio *bio) ++void cached_dev_submit_bio(struct bio *bio) + { + struct search *s; + struct block_device *orig_bdev = bio->bi_bdev; +@@ -1176,7 +1176,7 @@ blk_qc_t cached_dev_submit_bio(struct bio *bio) + dc->io_disable)) { + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); +- return BLK_QC_T_NONE; ++ return; + } + + if (likely(d->c)) { +@@ -1222,8 +1222,6 @@ blk_qc_t cached_dev_submit_bio(struct bio *bio) + } else + /* I/O request sent to backing device */ + detached_dev_do_request(d, bio, orig_bdev, start_time); +- +- return BLK_QC_T_NONE; + } + + static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode, +@@ -1273,7 +1271,7 @@ static void flash_dev_nodata(struct closure *cl) + continue_at(cl, search_free, NULL); + } + +-blk_qc_t flash_dev_submit_bio(struct bio *bio) ++void flash_dev_submit_bio(struct bio *bio) + { + struct search *s; + struct closure *cl; +@@ -1282,7 +1280,7 @@ blk_qc_t flash_dev_submit_bio(struct bio *bio) + if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) { + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); +- return BLK_QC_T_NONE; ++ return; + } + + s = search_alloc(bio, d, bio->bi_bdev, bio_start_io_acct(bio)); +@@ -1298,7 +1296,7 @@ blk_qc_t flash_dev_submit_bio(struct bio *bio) + continue_at_nobarrier(&s->cl, + flash_dev_nodata, + bcache_wq); +- return BLK_QC_T_NONE; ++ return; + } else if (bio_data_dir(bio)) { + bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys, + &KEY(d->id, bio->bi_iter.bi_sector, 0), +@@ -1314,7 +1312,6 @@ blk_qc_t flash_dev_submit_bio(struct bio *bio) + } + + continue_at(cl, search_free, NULL); +- return BLK_QC_T_NONE; + } + + static int flash_dev_ioctl(struct bcache_device *d, fmode_t mode, +diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h +index 82b38366a95d..38ab4856eaab 100644 +--- a/drivers/md/bcache/request.h ++++ b/drivers/md/bcache/request.h +@@ -37,10 +37,10 @@ unsigned int bch_get_congested(const struct cache_set *c); + void bch_data_insert(struct closure *cl); + + void bch_cached_dev_request_init(struct cached_dev *dc); +-blk_qc_t cached_dev_submit_bio(struct bio *bio); ++void cached_dev_submit_bio(struct bio *bio); + + void bch_flash_dev_request_init(struct bcache_device *d); +-blk_qc_t flash_dev_submit_bio(struct bio *bio); ++void flash_dev_submit_bio(struct bio *bio); + + extern struct kmem_cache *bch_search_cache; + +diff --git a/drivers/md/dm.c b/drivers/md/dm.c +index 76d9da49fda7..7870e6460633 100644 +--- a/drivers/md/dm.c ++++ b/drivers/md/dm.c +@@ -1183,14 +1183,13 @@ static noinline void __set_swap_bios_limit(struct mapped_device *md, int latch) + mutex_unlock(&md->swap_bios_lock); + } + +-static blk_qc_t __map_bio(struct dm_target_io *tio) ++static void __map_bio(struct dm_target_io *tio) + { + int r; + sector_t sector; + struct bio *clone = &tio->clone; + struct dm_io *io = tio->io; + struct dm_target *ti = tio->ti; +- blk_qc_t ret = BLK_QC_T_NONE; + + clone->bi_end_io = clone_endio; + +@@ -1226,7 +1225,7 @@ static blk_qc_t __map_bio(struct dm_target_io *tio) + case DM_MAPIO_REMAPPED: + /* the bio has been remapped so dispatch it */ + trace_block_bio_remap(clone, bio_dev(io->orig_bio), sector); +- ret = submit_bio_noacct(clone); ++ submit_bio_noacct(clone); + break; + case DM_MAPIO_KILL: + if (unlikely(swap_bios_limit(ti, clone))) { +@@ -1248,8 +1247,6 @@ static blk_qc_t __map_bio(struct dm_target_io *tio) + DMWARN("unimplemented target map return value: %d", r); + BUG(); + } +- +- return ret; + } + + static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len) +@@ -1336,7 +1333,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, + } + } + +-static blk_qc_t __clone_and_map_simple_bio(struct clone_info *ci, ++static void __clone_and_map_simple_bio(struct clone_info *ci, + struct dm_target_io *tio, unsigned *len) + { + struct bio *clone = &tio->clone; +@@ -1346,8 +1343,7 @@ static blk_qc_t __clone_and_map_simple_bio(struct clone_info *ci, + __bio_clone_fast(clone, ci->bio); + if (len) + bio_setup_sector(clone, ci->sector, *len); +- +- return __map_bio(tio); ++ __map_bio(tio); + } + + static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, +@@ -1361,7 +1357,7 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, + + while ((bio = bio_list_pop(&blist))) { + tio = container_of(bio, struct dm_target_io, clone); +- (void) __clone_and_map_simple_bio(ci, tio, len); ++ __clone_and_map_simple_bio(ci, tio, len); + } + } + +@@ -1405,7 +1401,7 @@ static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, + free_tio(tio); + return r; + } +- (void) __map_bio(tio); ++ __map_bio(tio); + + return 0; + } +@@ -1520,11 +1516,10 @@ static void init_clone_info(struct clone_info *ci, struct mapped_device *md, + /* + * Entry point to split a bio into clones and submit them to the targets. + */ +-static blk_qc_t __split_and_process_bio(struct mapped_device *md, ++static void __split_and_process_bio(struct mapped_device *md, + struct dm_table *map, struct bio *bio) + { + struct clone_info ci; +- blk_qc_t ret = BLK_QC_T_NONE; + int error = 0; + + init_clone_info(&ci, md, map, bio); +@@ -1567,19 +1562,17 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, + + bio_chain(b, bio); + trace_block_split(b, bio->bi_iter.bi_sector); +- ret = submit_bio_noacct(bio); ++ submit_bio_noacct(bio); + } + } + + /* drop the extra reference count */ + dm_io_dec_pending(ci.io, errno_to_blk_status(error)); +- return ret; + } + +-static blk_qc_t dm_submit_bio(struct bio *bio) ++static void dm_submit_bio(struct bio *bio) + { + struct mapped_device *md = bio->bi_bdev->bd_disk->private_data; +- blk_qc_t ret = BLK_QC_T_NONE; + int srcu_idx; + struct dm_table *map; + +@@ -1609,10 +1602,9 @@ static blk_qc_t dm_submit_bio(struct bio *bio) + if (is_abnormal_io(bio)) + blk_queue_split(&bio); + +- ret = __split_and_process_bio(md, map, bio); ++ __split_and_process_bio(md, map, bio); + out: + dm_put_live_table(md, srcu_idx); +- return ret; + } + + /*----------------------------------------------------------------- +diff --git a/drivers/md/md.c b/drivers/md/md.c +index ec09083ff0ef..22310d5d8d41 100644 +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -443,19 +443,19 @@ void md_handle_request(struct mddev *mddev, struct bio *bio) + } + EXPORT_SYMBOL(md_handle_request); + +-static blk_qc_t md_submit_bio(struct bio *bio) ++static void md_submit_bio(struct bio *bio) + { + const int rw = bio_data_dir(bio); + struct mddev *mddev = bio->bi_bdev->bd_disk->private_data; + + if (mddev == NULL || mddev->pers == NULL) { + bio_io_error(bio); +- return BLK_QC_T_NONE; ++ return; + } + + if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) { + bio_io_error(bio); +- return BLK_QC_T_NONE; ++ return; + } + + blk_queue_split(&bio); +@@ -464,15 +464,13 @@ static blk_qc_t md_submit_bio(struct bio *bio) + if (bio_sectors(bio) != 0) + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); +- return BLK_QC_T_NONE; ++ return; + } + + /* bio could be mergeable after passing to underlayer */ + bio->bi_opf &= ~REQ_NOMERGE; + + md_handle_request(mddev, bio); +- +- return BLK_QC_T_NONE; + } + + /* mddev_suspend makes sure no new requests are submitted +diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c +index 088d3dd6f6fa..b6c6866f9259 100644 +--- a/drivers/nvdimm/blk.c ++++ b/drivers/nvdimm/blk.c +@@ -162,7 +162,7 @@ static int nsblk_do_bvec(struct nd_namespace_blk *nsblk, + return err; + } + +-static blk_qc_t nd_blk_submit_bio(struct bio *bio) ++static void nd_blk_submit_bio(struct bio *bio) + { + struct bio_integrity_payload *bip; + struct nd_namespace_blk *nsblk = bio->bi_bdev->bd_disk->private_data; +@@ -173,7 +173,7 @@ static blk_qc_t nd_blk_submit_bio(struct bio *bio) + bool do_acct; + + if (!bio_integrity_prep(bio)) +- return BLK_QC_T_NONE; ++ return; + + bip = bio_integrity(bio); + rw = bio_data_dir(bio); +@@ -199,7 +199,6 @@ static blk_qc_t nd_blk_submit_bio(struct bio *bio) + bio_end_io_acct(bio, start); + + bio_endio(bio); +- return BLK_QC_T_NONE; + } + + static int nsblk_rw_bytes(struct nd_namespace_common *ndns, +diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c +index 92dec4952297..4295fa809420 100644 +--- a/drivers/nvdimm/btt.c ++++ b/drivers/nvdimm/btt.c +@@ -1440,7 +1440,7 @@ static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip, + return ret; + } + +-static blk_qc_t btt_submit_bio(struct bio *bio) ++static void btt_submit_bio(struct bio *bio) + { + struct bio_integrity_payload *bip = bio_integrity(bio); + struct btt *btt = bio->bi_bdev->bd_disk->private_data; +@@ -1451,7 +1451,7 @@ static blk_qc_t btt_submit_bio(struct bio *bio) + bool do_acct; + + if (!bio_integrity_prep(bio)) +- return BLK_QC_T_NONE; ++ return; + + do_acct = blk_queue_io_stat(bio->bi_bdev->bd_disk->queue); + if (do_acct) +@@ -1483,7 +1483,6 @@ static blk_qc_t btt_submit_bio(struct bio *bio) + bio_end_io_acct(bio, start); + + bio_endio(bio); +- return BLK_QC_T_NONE; + } + + static int btt_rw_page(struct block_device *bdev, sector_t sector, +diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c +index ef4950f80832..a67a3ad1d413 100644 +--- a/drivers/nvdimm/pmem.c ++++ b/drivers/nvdimm/pmem.c +@@ -190,7 +190,7 @@ static blk_status_t pmem_do_write(struct pmem_device *pmem, + return rc; + } + +-static blk_qc_t pmem_submit_bio(struct bio *bio) ++static void pmem_submit_bio(struct bio *bio) + { + int ret = 0; + blk_status_t rc = 0; +@@ -229,7 +229,6 @@ static blk_qc_t pmem_submit_bio(struct bio *bio) + bio->bi_status = errno_to_blk_status(ret); + + bio_endio(bio); +- return BLK_QC_T_NONE; + } + + static int pmem_rw_page(struct block_device *bdev, sector_t sector, +diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c +index fba06618c6c2..ab78aa5d28c6 100644 +--- a/drivers/nvme/host/multipath.c ++++ b/drivers/nvme/host/multipath.c +@@ -312,12 +312,11 @@ static bool nvme_available_path(struct nvme_ns_head *head) + return false; + } + +-static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio) ++static void nvme_ns_head_submit_bio(struct bio *bio) + { + struct nvme_ns_head *head = bio->bi_bdev->bd_disk->private_data; + struct device *dev = disk_to_dev(head->disk); + struct nvme_ns *ns; +- blk_qc_t ret = BLK_QC_T_NONE; + int srcu_idx; + + /* +@@ -334,7 +333,7 @@ static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio) + bio->bi_opf |= REQ_NVME_MPATH; + trace_block_bio_remap(bio, disk_devt(ns->head->disk), + bio->bi_iter.bi_sector); +- ret = submit_bio_noacct(bio); ++ submit_bio_noacct(bio); + } else if (nvme_available_path(head)) { + dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n"); + +@@ -349,7 +348,6 @@ static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio) + } + + srcu_read_unlock(&head->srcu, srcu_idx); +- return ret; + } + + static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode) +diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c +index 5be3d1c39a78..59e513d34b0f 100644 +--- a/drivers/s390/block/dcssblk.c ++++ b/drivers/s390/block/dcssblk.c +@@ -30,7 +30,7 @@ + + static int dcssblk_open(struct block_device *bdev, fmode_t mode); + static void dcssblk_release(struct gendisk *disk, fmode_t mode); +-static blk_qc_t dcssblk_submit_bio(struct bio *bio); ++static void dcssblk_submit_bio(struct bio *bio); + static long dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, + long nr_pages, void **kaddr, pfn_t *pfn); + +@@ -854,7 +854,7 @@ dcssblk_release(struct gendisk *disk, fmode_t mode) + up_write(&dcssblk_devices_sem); + } + +-static blk_qc_t ++static void + dcssblk_submit_bio(struct bio *bio) + { + struct dcssblk_dev_info *dev_info; +@@ -907,10 +907,9 @@ dcssblk_submit_bio(struct bio *bio) + bytes_done += bvec.bv_len; + } + bio_endio(bio); +- return BLK_QC_T_NONE; ++ return; + fail: + bio_io_error(bio); +- return BLK_QC_T_NONE; + } + + static long +diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c +index 4a9077c52444..04090ba0ef73 100644 +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -8248,7 +8248,7 @@ static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio, + return dip; + } + +-static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter, ++static void btrfs_submit_direct(const struct iomap_iter *iter, + struct bio *dio_bio, loff_t file_offset) + { + struct inode *inode = iter->inode; +@@ -8278,7 +8278,7 @@ static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter, + } + dio_bio->bi_status = BLK_STS_RESOURCE; + bio_endio(dio_bio); +- return BLK_QC_T_NONE; ++ return; + } + + if (!write) { +@@ -8372,15 +8372,13 @@ static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter, + + free_extent_map(em); + } while (submit_len > 0); +- return BLK_QC_T_NONE; ++ return; + + out_err_em: + free_extent_map(em); + out_err: + dip->dio_bio->bi_status = status; + btrfs_dio_private_put(dip); +- +- return BLK_QC_T_NONE; + } + + const struct iomap_ops btrfs_dio_iomap_ops = { +diff --git a/fs/ext4/file.c b/fs/ext4/file.c +index ac0e11bbb445..9c5559faacda 100644 +--- a/fs/ext4/file.c ++++ b/fs/ext4/file.c +@@ -915,7 +915,7 @@ const struct file_operations ext4_file_operations = { + .llseek = ext4_llseek, + .read_iter = ext4_file_read_iter, + .write_iter = ext4_file_write_iter, +- .iopoll = iomap_dio_iopoll, ++ .iopoll = iocb_bio_iopoll, + .unlocked_ioctl = ext4_ioctl, + #ifdef CONFIG_COMPAT + .compat_ioctl = ext4_compat_ioctl, +diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c +index c559827cb6f9..635f0e3f10ec 100644 +--- a/fs/gfs2/file.c ++++ b/fs/gfs2/file.c +@@ -1353,7 +1353,7 @@ const struct file_operations gfs2_file_fops = { + .llseek = gfs2_llseek, + .read_iter = gfs2_file_read_iter, + .write_iter = gfs2_file_write_iter, +- .iopoll = iomap_dio_iopoll, ++ .iopoll = iocb_bio_iopoll, + .unlocked_ioctl = gfs2_ioctl, + .compat_ioctl = gfs2_compat_ioctl, + .mmap = gfs2_mmap, +@@ -1386,7 +1386,7 @@ const struct file_operations gfs2_file_fops_nolock = { + .llseek = gfs2_llseek, + .read_iter = gfs2_file_read_iter, + .write_iter = gfs2_file_write_iter, +- .iopoll = iomap_dio_iopoll, ++ .iopoll = iocb_bio_iopoll, + .unlocked_ioctl = gfs2_ioctl, + .compat_ioctl = gfs2_compat_ioctl, + .mmap = gfs2_mmap, +diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c +index 236aba256cd1..8efab177011d 100644 +--- a/fs/iomap/direct-io.c ++++ b/fs/iomap/direct-io.c +@@ -38,8 +38,7 @@ struct iomap_dio { + struct { + struct iov_iter *iter; + struct task_struct *waiter; +- struct request_queue *last_queue; +- blk_qc_t cookie; ++ struct bio *poll_bio; + } submit; + + /* used for aio completion: */ +@@ -49,29 +48,20 @@ struct iomap_dio { + }; + }; + +-int iomap_dio_iopoll(struct kiocb *kiocb, unsigned int flags) +-{ +- struct request_queue *q = READ_ONCE(kiocb->private); +- +- if (!q) +- return 0; +- return blk_poll(q, READ_ONCE(kiocb->ki_cookie), flags); +-} +-EXPORT_SYMBOL_GPL(iomap_dio_iopoll); +- + static void iomap_dio_submit_bio(const struct iomap_iter *iter, + struct iomap_dio *dio, struct bio *bio, loff_t pos) + { + atomic_inc(&dio->ref); + +- if (dio->iocb->ki_flags & IOCB_HIPRI) ++ if (dio->iocb->ki_flags & IOCB_HIPRI) { + bio_set_polled(bio, dio->iocb); ++ dio->submit.poll_bio = bio; ++ } + +- dio->submit.last_queue = bdev_get_queue(iter->iomap.bdev); + if (dio->dops && dio->dops->submit_io) +- dio->submit.cookie = dio->dops->submit_io(iter, bio, pos); ++ dio->dops->submit_io(iter, bio, pos); + else +- dio->submit.cookie = submit_bio(bio); ++ submit_bio(bio); + } + + ssize_t iomap_dio_complete(struct iomap_dio *dio) +@@ -164,9 +154,11 @@ static void iomap_dio_bio_end_io(struct bio *bio) + } else if (dio->flags & IOMAP_DIO_WRITE) { + struct inode *inode = file_inode(dio->iocb->ki_filp); + ++ WRITE_ONCE(dio->iocb->private, NULL); + INIT_WORK(&dio->aio.work, iomap_dio_complete_work); + queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work); + } else { ++ WRITE_ONCE(dio->iocb->private, NULL); + iomap_dio_complete_work(&dio->aio.work); + } + } +@@ -497,8 +489,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + + dio->submit.iter = iter; + dio->submit.waiter = current; +- dio->submit.cookie = BLK_QC_T_NONE; +- dio->submit.last_queue = NULL; ++ dio->submit.poll_bio = NULL; + + if (iov_iter_rw(iter) == READ) { + if (iomi.pos >= dio->i_size) +@@ -611,8 +602,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + if (dio->flags & IOMAP_DIO_WRITE_FUA) + dio->flags &= ~IOMAP_DIO_NEED_SYNC; + +- WRITE_ONCE(iocb->ki_cookie, dio->submit.cookie); +- WRITE_ONCE(iocb->private, dio->submit.last_queue); ++ WRITE_ONCE(iocb->private, dio->submit.poll_bio); + + /* + * We are about to drop our additional submission reference, which +@@ -639,10 +629,8 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + if (!READ_ONCE(dio->submit.waiter)) + break; + +- if (!(iocb->ki_flags & IOCB_HIPRI) || +- !dio->submit.last_queue || +- !blk_poll(dio->submit.last_queue, +- dio->submit.cookie, 0)) ++ if (!dio->submit.poll_bio || ++ !bio_poll(dio->submit.poll_bio, 0)) + blk_io_schedule(); + } + __set_current_state(TASK_RUNNING); +diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c +index 7aa943edfc02..62e7fbe4e54c 100644 +--- a/fs/xfs/xfs_file.c ++++ b/fs/xfs/xfs_file.c +@@ -1452,7 +1452,7 @@ const struct file_operations xfs_file_operations = { + .write_iter = xfs_file_write_iter, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, +- .iopoll = iomap_dio_iopoll, ++ .iopoll = iocb_bio_iopoll, + .unlocked_ioctl = xfs_file_ioctl, + #ifdef CONFIG_COMPAT + .compat_ioctl = xfs_file_compat_ioctl, +diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c +index ddc346a9df9b..3ce5f47338cb 100644 +--- a/fs/zonefs/super.c ++++ b/fs/zonefs/super.c +@@ -1128,7 +1128,7 @@ static const struct file_operations zonefs_file_operations = { + .write_iter = zonefs_file_write_iter, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, +- .iopoll = iomap_dio_iopoll, ++ .iopoll = iocb_bio_iopoll, + }; + + static struct kmem_cache *zonefs_inode_cachep; +diff --git a/include/linux/bio.h b/include/linux/bio.h +index c7a2d880e927..62d684b7dd4c 100644 +--- a/include/linux/bio.h ++++ b/include/linux/bio.h +@@ -349,7 +349,7 @@ static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned short nr_iovecs) + return bio_alloc_bioset(gfp_mask, nr_iovecs, &fs_bio_set); + } + +-extern blk_qc_t submit_bio(struct bio *); ++void submit_bio(struct bio *bio); + + extern void bio_endio(struct bio *); + +diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h +index 2219e9277118..a9c1d0882550 100644 +--- a/include/linux/blk-mq.h ++++ b/include/linux/blk-mq.h +@@ -359,9 +359,9 @@ struct blk_mq_hw_ctx { + /** @kobj: Kernel object for sysfs. */ + struct kobject kobj; + +- /** @poll_considered: Count times blk_poll() was called. */ ++ /** @poll_considered: Count times blk_mq_poll() was called. */ + unsigned long poll_considered; +- /** @poll_invoked: Count how many requests blk_poll() polled. */ ++ /** @poll_invoked: Count how many requests blk_mq_poll() polled. */ + unsigned long poll_invoked; + /** @poll_success: Count how many polled requests were completed. */ + unsigned long poll_success; +@@ -815,16 +815,6 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq) + for ((i) = 0; (i) < (hctx)->nr_ctx && \ + ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++) + +-static inline blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, +- struct request *rq) +-{ +- if (rq->tag != -1) +- return rq->tag | (hctx->queue_num << BLK_QC_T_SHIFT); +- +- return rq->internal_tag | (hctx->queue_num << BLK_QC_T_SHIFT) | +- BLK_QC_T_INTERNAL; +-} +- + static inline void blk_mq_cleanup_rq(struct request *rq) + { + if (rq->q->mq_ops->cleanup_rq) +@@ -843,7 +833,6 @@ static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio, + rq->rq_disk = bio->bi_bdev->bd_disk; + } + +-blk_qc_t blk_mq_submit_bio(struct bio *bio); + void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, + struct lock_class_key *key); + +diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h +index f8b9fce68834..72736b4c057c 100644 +--- a/include/linux/blk_types.h ++++ b/include/linux/blk_types.h +@@ -208,6 +208,9 @@ static inline void bio_issue_init(struct bio_issue *issue, + ((u64)size << BIO_ISSUE_SIZE_SHIFT)); + } + ++typedef unsigned int blk_qc_t; ++#define BLK_QC_T_NONE -1U ++ + /* + * main unit of I/O for the block layer and lower layers (ie drivers and + * stacking drivers) +@@ -227,8 +230,8 @@ struct bio { + + struct bvec_iter bi_iter; + ++ blk_qc_t bi_cookie; + bio_end_io_t *bi_end_io; +- + void *bi_private; + #ifdef CONFIG_BLK_CGROUP + /* +@@ -384,7 +387,7 @@ enum req_flag_bits { + /* command specific flags for REQ_OP_WRITE_ZEROES: */ + __REQ_NOUNMAP, /* do not free blocks when zeroing */ + +- __REQ_POLLED, /* caller polls for completion using blk_poll */ ++ __REQ_POLLED, /* caller polls for completion using bio_poll */ + + /* for driver use */ + __REQ_DRV, +@@ -495,11 +498,6 @@ static inline int op_stat_group(unsigned int op) + return op_is_write(op); + } + +-typedef unsigned int blk_qc_t; +-#define BLK_QC_T_NONE -1U +-#define BLK_QC_T_SHIFT 16 +-#define BLK_QC_T_INTERNAL (1U << 31) +- + struct blk_rq_stat { + u64 mean; + u64 min; +diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h +index 2b80c98fc373..2a8689e949b4 100644 +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -25,6 +25,7 @@ struct request; + struct sg_io_hdr; + struct blkcg_gq; + struct blk_flush_queue; ++struct kiocb; + struct pr_ops; + struct rq_qos; + struct blk_queue_stats; +@@ -550,7 +551,7 @@ static inline unsigned int blk_queue_depth(struct request_queue *q) + + extern int blk_register_queue(struct gendisk *disk); + extern void blk_unregister_queue(struct gendisk *disk); +-blk_qc_t submit_bio_noacct(struct bio *bio); ++void submit_bio_noacct(struct bio *bio); + + extern int blk_lld_busy(struct request_queue *q); + extern void blk_queue_split(struct bio **); +@@ -568,7 +569,8 @@ blk_status_t errno_to_blk_status(int errno); + #define BLK_POLL_ONESHOT (1 << 0) + /* do not sleep to wait for the expected completion time */ + #define BLK_POLL_NOSLEEP (1 << 1) +-int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags); ++int bio_poll(struct bio *bio, unsigned int flags); ++int iocb_bio_iopoll(struct kiocb *kiocb, unsigned int flags); + + static inline struct request_queue *bdev_get_queue(struct block_device *bdev) + { +@@ -1176,7 +1178,7 @@ static inline void blk_ksm_unregister(struct request_queue *q) { } + + + struct block_device_operations { +- blk_qc_t (*submit_bio) (struct bio *bio); ++ void (*submit_bio)(struct bio *bio); + int (*open) (struct block_device *, fmode_t); + void (*release) (struct gendisk *, fmode_t); + int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int); +diff --git a/include/linux/fs.h b/include/linux/fs.h +index c443cddf414f..f595f4097cb7 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -334,11 +334,7 @@ struct kiocb { + int ki_flags; + u16 ki_hint; + u16 ki_ioprio; /* See linux/ioprio.h */ +- union { +- unsigned int ki_cookie; /* for ->iopoll */ +- struct wait_page_queue *ki_waitq; /* for async buffered IO */ +- }; +- ++ struct wait_page_queue *ki_waitq; /* for async buffered IO */ + randomized_struct_fields_end + }; + +diff --git a/include/linux/iomap.h b/include/linux/iomap.h +index 1e86b65567c2..63f4ea4dac9b 100644 +--- a/include/linux/iomap.h ++++ b/include/linux/iomap.h +@@ -313,8 +313,8 @@ int iomap_writepages(struct address_space *mapping, + struct iomap_dio_ops { + int (*end_io)(struct kiocb *iocb, ssize_t size, int error, + unsigned flags); +- blk_qc_t (*submit_io)(const struct iomap_iter *iter, struct bio *bio, +- loff_t file_offset); ++ void (*submit_io)(const struct iomap_iter *iter, struct bio *bio, ++ loff_t file_offset); + }; + + /* +@@ -337,7 +337,6 @@ struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + const struct iomap_ops *ops, const struct iomap_dio_ops *dops, + unsigned int dio_flags); + ssize_t iomap_dio_complete(struct iomap_dio *dio); +-int iomap_dio_iopoll(struct kiocb *kiocb, unsigned int flags); + + #ifdef CONFIG_SWAP + struct file; +diff --git a/mm/page_io.c b/mm/page_io.c +index ed2eded74f3a..a68faab5b310 100644 +--- a/mm/page_io.c ++++ b/mm/page_io.c +@@ -358,8 +358,6 @@ int swap_readpage(struct page *page, bool synchronous) + struct bio *bio; + int ret = 0; + struct swap_info_struct *sis = page_swap_info(page); +- blk_qc_t qc; +- struct gendisk *disk; + unsigned long pflags; + + VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page); +@@ -409,8 +407,6 @@ int swap_readpage(struct page *page, bool synchronous) + bio->bi_iter.bi_sector = swap_page_sector(page); + bio->bi_end_io = end_swap_bio_read; + bio_add_page(bio, page, thp_size(page), 0); +- +- disk = bio->bi_bdev->bd_disk; + /* + * Keep this task valid during swap readpage because the oom killer may + * attempt to access it in the page fault retry time check. +@@ -422,13 +418,13 @@ int swap_readpage(struct page *page, bool synchronous) + } + count_vm_event(PSWPIN); + bio_get(bio); +- qc = submit_bio(bio); ++ submit_bio(bio); + while (synchronous) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (!READ_ONCE(bio->bi_private)) + break; + +- if (!blk_poll(disk->queue, qc, 0)) ++ if (!bio_poll(bio, 0)) + blk_io_schedule(); + } + __set_current_state(TASK_RUNNING); +-- +2.35.3 + diff --git a/patches.suse/block-sx8-add-error-handling-support-for-add_disk.patch b/patches.suse/block-sx8-add-error-handling-support-for-add_disk.patch new file mode 100644 index 0000000..bf569ce --- /dev/null +++ b/patches.suse/block-sx8-add-error-handling-support-for-add_disk.patch @@ -0,0 +1,78 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:01:55 -0700 +Subject: [PATCH] block/sx8: add error handling support for add_disk() +Git-commit: 637208e74a861d993c3a8eea3f9f1df4415930e0 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +A completion is used to notify the initial probe what is +happening and so we must defer error handling on completion. +Do this by remembering the error and using the shared cleanup +function. + +The tags are shared and so are hanlded later for the +driver already. + +Signed-off-by: Luis Chamberlain +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/sx8.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c +index 420cd952ddc4..1c79248c4826 100644 +--- a/drivers/block/sx8.c ++++ b/drivers/block/sx8.c +@@ -297,6 +297,7 @@ struct carm_host { + + struct work_struct fsm_task; + ++ int probe_err; + struct completion probe_comp; + }; + +@@ -1181,8 +1182,11 @@ static void carm_fsm_task (struct work_struct *work) + struct gendisk *disk = port->disk; + + set_capacity(disk, port->capacity); +- add_disk(disk); +- activated++; ++ host->probe_err = add_disk(disk); ++ if (!host->probe_err) ++ activated++; ++ else ++ break; + } + + printk(KERN_INFO DRV_NAME "(%s): %d ports activated\n", +@@ -1192,11 +1196,9 @@ static void carm_fsm_task (struct work_struct *work) + reschedule = 1; + break; + } +- + case HST_PROBE_FINISHED: + complete(&host->probe_comp); + break; +- + case HST_ERROR: + /* FIXME: TODO */ + break; +@@ -1507,7 +1509,10 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) + goto err_out_free_irq; + + DPRINTK("waiting for probe_comp\n"); ++ host->probe_err = -ENODEV; + wait_for_completion(&host->probe_comp); ++ if (host->probe_err) ++ goto err_out_free_irq; + + printk(KERN_INFO "%s: pci %s, ports %d, io %llx, irq %u, major %d\n", + host->name, pci_name(pdev), (int) CARM_MAX_PORTS, +-- +2.35.3 + diff --git a/patches.suse/block-turn-macro-helpers-into-inline-functions.patch b/patches.suse/block-turn-macro-helpers-into-inline-functions.patch new file mode 100644 index 0000000..5456590 --- /dev/null +++ b/patches.suse/block-turn-macro-helpers-into-inline-functions.patch @@ -0,0 +1,72 @@ +From: Pavel Begunkov +Date: Tue, 19 Oct 2021 22:24:10 +0100 +Subject: [PATCH] block: turn macro helpers into inline functions +Git-commit: cf6d6238cdd319eca404756dee05bf55a748b6a9 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Replace bio_set_dev() with an identical inline helper and move it +further to fix a dependency problem with bio_associate_blkg(). Do the +same for bio_copy_dev(). + +Reviewed-by: Christoph Hellwig +Signed-off-by: Pavel Begunkov +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + include/linux/bio.h | 32 ++++++++++++++++---------------- + 1 file changed, 16 insertions(+), 16 deletions(-) + +diff --git a/include/linux/bio.h b/include/linux/bio.h +index 9538f20ffaa5..b12453d7b8a8 100644 +--- a/include/linux/bio.h ++++ b/include/linux/bio.h +@@ -430,22 +430,6 @@ void zero_fill_bio(struct bio *bio); + + extern const char *bio_devname(struct bio *bio, char *buffer); + +-#define bio_set_dev(bio, bdev) \ +-do { \ +- bio_clear_flag(bio, BIO_REMAPPED); \ +- if ((bio)->bi_bdev != (bdev)) \ +- bio_clear_flag(bio, BIO_THROTTLED); \ +- (bio)->bi_bdev = (bdev); \ +- bio_associate_blkg(bio); \ +-} while (0) +- +-#define bio_copy_dev(dst, src) \ +-do { \ +- bio_clear_flag(dst, BIO_REMAPPED); \ +- (dst)->bi_bdev = (src)->bi_bdev; \ +- bio_clone_blkg_association(dst, src); \ +-} while (0) +- + #define bio_dev(bio) \ + disk_devt((bio)->bi_bdev->bd_disk) + +@@ -463,6 +447,22 @@ static inline void bio_clone_blkg_association(struct bio *dst, + struct bio *src) { } + #endif /* CONFIG_BLK_CGROUP */ + ++static inline void bio_set_dev(struct bio *bio, struct block_device *bdev) ++{ ++ bio_clear_flag(bio, BIO_REMAPPED); ++ if (bio->bi_bdev != bdev) ++ bio_clear_flag(bio, BIO_THROTTLED); ++ bio->bi_bdev = bdev; ++ bio_associate_blkg(bio); ++} ++ ++static inline void bio_copy_dev(struct bio *dst, struct bio *src) ++{ ++ bio_clear_flag(dst, BIO_REMAPPED); ++ dst->bi_bdev = src->bi_bdev; ++ bio_clone_blkg_association(dst, src); ++} ++ + /* + * BIO list management for use by remapping drivers (e.g. DM or MD) and loop. + * +-- +2.35.3 + diff --git a/patches.suse/block-use-SLAB_TYPESAFE_BY_RCU-for-the-bio-slab.patch b/patches.suse/block-use-SLAB_TYPESAFE_BY_RCU-for-the-bio-slab.patch new file mode 100644 index 0000000..78e9360 --- /dev/null +++ b/patches.suse/block-use-SLAB_TYPESAFE_BY_RCU-for-the-bio-slab.patch @@ -0,0 +1,38 @@ +From: Christoph Hellwig +Date: Tue, 12 Oct 2021 13:12:22 +0200 +Subject: [PATCH] block: use SLAB_TYPESAFE_BY_RCU for the bio slab +Git-commit: 1a7e76e4f130332b5d3b0c72c4f664e59deb1239 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +This flags ensures that the pages will not be reused for non-bio +allocations before the end of an RCU grace period. With that we can +safely use a RCU lookup for bio polling as long as we are fine with +occasionally polling the wrong device. + +Signed-off-by: Christoph Hellwig +Tested-by: Mark Wunderlich +Link: https://lore.kernel.org/r/20211012111226.760968-13-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/bio.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/block/bio.c b/block/bio.c +index d5120451c36a..df45f4b996ac 100644 +--- a/block/bio.c ++++ b/block/bio.c +@@ -87,7 +87,8 @@ static struct bio_slab *create_bio_slab(unsigned int size) + + snprintf(bslab->name, sizeof(bslab->name), "bio-%d", size); + bslab->slab = kmem_cache_create(bslab->name, size, +- ARCH_KMALLOC_MINALIGN, SLAB_HWCACHE_ALIGN, NULL); ++ ARCH_KMALLOC_MINALIGN, ++ SLAB_HWCACHE_ALIGN | SLAB_TYPESAFE_BY_RCU, NULL); + if (!bslab->slab) + goto fail_alloc_slab; + +-- +2.35.3 + diff --git a/patches.suse/block-use-bdev_get_queue-in-bdev.c.patch b/patches.suse/block-use-bdev_get_queue-in-bdev.c.patch new file mode 100644 index 0000000..3ce660e --- /dev/null +++ b/patches.suse/block-use-bdev_get_queue-in-bdev.c.patch @@ -0,0 +1,58 @@ +From: Pavel Begunkov +Date: Thu, 14 Oct 2021 15:03:27 +0100 +Subject: [PATCH] block: use bdev_get_queue() in bdev.c +Git-commit: 025a38651ba6178a709ecf351ca90d11fa2908cd +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Convert bdev->bd_disk->queue to bdev_get_queue(), it's uses a cached +queue pointer and so is faster. + +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/a352936ce5d9ac719645b1e29b173d931ebcdc02.1634219547.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/bdev.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/block/bdev.c b/block/bdev.c +index fed8d0c041c7..cff0bb3a4578 100644 +--- a/block/bdev.c ++++ b/block/bdev.c +@@ -327,12 +327,12 @@ int bdev_read_page(struct block_device *bdev, sector_t sector, + if (!ops->rw_page || bdev_get_integrity(bdev)) + return result; + +- result = blk_queue_enter(bdev->bd_disk->queue, 0); ++ result = blk_queue_enter(bdev_get_queue(bdev), 0); + if (result) + return result; + result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, + REQ_OP_READ); +- blk_queue_exit(bdev->bd_disk->queue); ++ blk_queue_exit(bdev_get_queue(bdev)); + return result; + } + +@@ -363,7 +363,7 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, + + if (!ops->rw_page || bdev_get_integrity(bdev)) + return -EOPNOTSUPP; +- result = blk_queue_enter(bdev->bd_disk->queue, 0); ++ result = blk_queue_enter(bdev_get_queue(bdev), 0); + if (result) + return result; + +@@ -376,7 +376,7 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, + clean_page_buffers(page); + unlock_page(page); + } +- blk_queue_exit(bdev->bd_disk->queue); ++ blk_queue_exit(bdev_get_queue(bdev)); + return result; + } + +-- +2.35.3 + diff --git a/patches.suse/block-use-bdev_get_queue-in-bio.c.patch b/patches.suse/block-use-bdev_get_queue-in-bio.c.patch new file mode 100644 index 0000000..4088b0b --- /dev/null +++ b/patches.suse/block-use-bdev_get_queue-in-bio.c.patch @@ -0,0 +1,65 @@ +From: Pavel Begunkov +Date: Thu, 14 Oct 2021 15:03:28 +0100 +Subject: [PATCH] block: use bdev_get_queue() in bio.c +Git-commit: 3caee4634be68e755d2fb130962f1623661dbd5b +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Convert bdev->bd_disk->queue to bdev_get_queue(), it's uses a cached +queue pointer and so is faster. + +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/85c36ea784d285a5075baa10049e6b59e15fb484.1634219547.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/bio.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/block/bio.c b/block/bio.c +index a3c9ff23a036..5fb8092577bf 100644 +--- a/block/bio.c ++++ b/block/bio.c +@@ -965,7 +965,7 @@ EXPORT_SYMBOL(bio_add_pc_page); + int bio_add_zone_append_page(struct bio *bio, struct page *page, + unsigned int len, unsigned int offset) + { +- struct request_queue *q = bio->bi_bdev->bd_disk->queue; ++ struct request_queue *q = bdev_get_queue(bio->bi_bdev); + bool same_page = false; + + if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_ZONE_APPEND)) +@@ -1070,7 +1070,7 @@ static int bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter) + + static int bio_iov_bvec_set_append(struct bio *bio, struct iov_iter *iter) + { +- struct request_queue *q = bio->bi_bdev->bd_disk->queue; ++ struct request_queue *q = bdev_get_queue(bio->bi_bdev); + struct iov_iter i = *iter; + + iov_iter_truncate(&i, queue_max_zone_append_sectors(q) << 9); +@@ -1148,7 +1148,7 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter) + { + unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; + unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; +- struct request_queue *q = bio->bi_bdev->bd_disk->queue; ++ struct request_queue *q = bdev_get_queue(bio->bi_bdev); + unsigned int max_append_sectors = queue_max_zone_append_sectors(q); + struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; + struct page **pages = (struct page **)bv; +@@ -1485,10 +1485,10 @@ void bio_endio(struct bio *bio) + return; + + if (bio->bi_bdev && bio_flagged(bio, BIO_TRACKED)) +- rq_qos_done_bio(bio->bi_bdev->bd_disk->queue, bio); ++ rq_qos_done_bio(bdev_get_queue(bio->bi_bdev), bio); + + if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) { +- trace_block_bio_complete(bio->bi_bdev->bd_disk->queue, bio); ++ trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio); + bio_clear_flag(bio, BIO_TRACE_COMPLETION); + } + +-- +2.35.3 + diff --git a/patches.suse/block-use-bdev_get_queue-in-blk-core.c.patch b/patches.suse/block-use-bdev_get_queue-in-blk-core.c.patch new file mode 100644 index 0000000..cf98ad9 --- /dev/null +++ b/patches.suse/block-use-bdev_get_queue-in-blk-core.c.patch @@ -0,0 +1,75 @@ +From: Pavel Begunkov +Date: Thu, 14 Oct 2021 15:03:29 +0100 +Subject: [PATCH] block: use bdev_get_queue() in blk-core.c +Git-commit: eab4e02733699cdf76fbe5e542d248c28454b3af +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Convert bdev->bd_disk->queue to bdev_get_queue(), it's uses a cached +queue pointer and so is faster. + +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/efc41f880262517c8dc32f932f1b23112f21b255.1634219547.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-core.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +diff --git a/block/blk-core.c b/block/blk-core.c +index f008c38ae967..96ee996c0577 100644 +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -472,10 +472,11 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) + + static inline int bio_queue_enter(struct bio *bio) + { +- struct gendisk *disk = bio->bi_bdev->bd_disk; +- struct request_queue *q = disk->queue; ++ struct request_queue *q = bdev_get_queue(bio->bi_bdev); + + while (!blk_try_enter_queue(q, false)) { ++ struct gendisk *disk = bio->bi_bdev->bd_disk; ++ + if (bio->bi_opf & REQ_NOWAIT) { + if (test_bit(GD_DEAD, &disk->state)) + goto dead; +@@ -800,7 +801,7 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q, + static noinline_for_stack bool submit_bio_checks(struct bio *bio) + { + struct block_device *bdev = bio->bi_bdev; +- struct request_queue *q = bdev->bd_disk->queue; ++ struct request_queue *q = bdev_get_queue(bdev); + blk_status_t status = BLK_STS_IOERR; + struct blk_plug *plug; + +@@ -962,7 +963,7 @@ static void __submit_bio_noacct(struct bio *bio) + current->bio_list = bio_list_on_stack; + + do { +- struct request_queue *q = bio->bi_bdev->bd_disk->queue; ++ struct request_queue *q = bdev_get_queue(bio->bi_bdev); + struct bio_list lower, same; + + /* +@@ -980,7 +981,7 @@ static void __submit_bio_noacct(struct bio *bio) + bio_list_init(&lower); + bio_list_init(&same); + while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) +- if (q == bio->bi_bdev->bd_disk->queue) ++ if (q == bdev_get_queue(bio->bi_bdev)) + bio_list_add(&same, bio); + else + bio_list_add(&lower, bio); +@@ -1062,7 +1063,7 @@ void submit_bio(struct bio *bio) + + if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) + count = queue_logical_block_size( +- bio->bi_bdev->bd_disk->queue) >> 9; ++ bdev_get_queue(bio->bi_bdev)) >> 9; + else + count = bio_sectors(bio); + +-- +2.35.3 + diff --git a/patches.suse/block-use-bdev_nr_bytes-instead-of-open-coding-it-in.patch b/patches.suse/block-use-bdev_nr_bytes-instead-of-open-coding-it-in.patch new file mode 100644 index 0000000..129af5e --- /dev/null +++ b/patches.suse/block-use-bdev_nr_bytes-instead-of-open-coding-it-in.patch @@ -0,0 +1,37 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:24 +0200 +Subject: [PATCH] block: use bdev_nr_bytes instead of open coding it in + blkdev_fallocate +Git-commit: 2a93ad8fcb377b9d6e05947de161e146f5be4de9 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helper to read the block device size. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Reviewed-by: Jan Kara +Reviewed-by: Chaitanya Kulkarni +Link: https://lore.kernel.org/r/20211018101130.1838532-25-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/fops.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/fops.c b/block/fops.c +index 2c43e493e37c..2ae8a7bd2b84 100644 +--- a/block/fops.c ++++ b/block/fops.c +@@ -535,7 +535,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, + return -EOPNOTSUPP; + + /* Don't go off the end of the device. */ +- isize = i_size_read(bdev->bd_inode); ++ isize = bdev_nr_bytes(bdev); + if (start >= isize) + return -EINVAL; + if (end >= isize) { +-- +2.35.3 + diff --git a/patches.suse/block-use-flags-instead-of-bit-fields-for-blkdev_dio.patch b/patches.suse/block-use-flags-instead-of-bit-fields-for-blkdev_dio.patch new file mode 100644 index 0000000..f31c464 --- /dev/null +++ b/patches.suse/block-use-flags-instead-of-bit-fields-for-blkdev_dio.patch @@ -0,0 +1,131 @@ +From: Jens Axboe +Date: Thu, 14 Oct 2021 11:17:43 -0600 +Subject: [PATCH] block: use flags instead of bit fields for blkdev_dio +Git-commit: 09ce8744253a038eb658c14f9dc3b77fa021fc9f +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +This generates a lot better code for me, and bumps performance from +7650K IOPS to 7750K IOPS. Looking at profiles for the run and running +perf diff, it confirms that we're now sending a lot less time there: + + 6.38% -2.80% [kernel.vmlinux] [k] blkdev_direct_IO + +Taking it from the 2nd most cycle consumer to only the 9th most at +3.35% of the CPU time. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/fops.c | 34 ++++++++++++++++++++-------------- + 1 file changed, 20 insertions(+), 14 deletions(-) + +diff --git a/block/fops.c b/block/fops.c +index 551b71af6d90..1d4f862950bb 100644 +--- a/block/fops.c ++++ b/block/fops.c +@@ -123,6 +123,12 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, + return ret; + } + ++enum { ++ DIO_MULTI_BIO = 1, ++ DIO_SHOULD_DIRTY = 2, ++ DIO_IS_SYNC = 4, ++}; ++ + struct blkdev_dio { + union { + struct kiocb *iocb; +@@ -130,9 +136,7 @@ struct blkdev_dio { + }; + size_t size; + atomic_t ref; +- bool multi_bio : 1; +- bool should_dirty : 1; +- bool is_sync : 1; ++ unsigned int flags; + struct bio bio; + }; + +@@ -141,13 +145,13 @@ static struct bio_set blkdev_dio_pool; + static void blkdev_bio_end_io(struct bio *bio) + { + struct blkdev_dio *dio = bio->bi_private; +- bool should_dirty = dio->should_dirty; ++ bool should_dirty = dio->flags & DIO_SHOULD_DIRTY; + + if (bio->bi_status && !dio->bio.bi_status) + dio->bio.bi_status = bio->bi_status; + +- if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) { +- if (!dio->is_sync) { ++ if (!(dio->flags & DIO_MULTI_BIO) || atomic_dec_and_test(&dio->ref)) { ++ if (!(dio->flags & DIO_IS_SYNC)) { + struct kiocb *iocb = dio->iocb; + ssize_t ret; + +@@ -161,7 +165,7 @@ static void blkdev_bio_end_io(struct bio *bio) + } + + dio->iocb->ki_complete(iocb, ret, 0); +- if (dio->multi_bio) ++ if (dio->flags & DIO_MULTI_BIO) + bio_put(&dio->bio); + } else { + struct task_struct *waiter = dio->waiter; +@@ -198,17 +202,19 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool); + + dio = container_of(bio, struct blkdev_dio, bio); +- dio->is_sync = is_sync = is_sync_kiocb(iocb); +- if (dio->is_sync) { ++ is_sync = is_sync_kiocb(iocb); ++ if (is_sync) { ++ dio->flags = DIO_IS_SYNC; + dio->waiter = current; + bio_get(bio); + } else { ++ dio->flags = 0; + dio->iocb = iocb; + } + + dio->size = 0; +- dio->multi_bio = false; +- dio->should_dirty = is_read && iter_is_iovec(iter); ++ if (is_read && iter_is_iovec(iter)) ++ dio->flags |= DIO_SHOULD_DIRTY; + + /* + * Don't plug for HIPRI/polled IO, as those should go straight +@@ -234,7 +240,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + + if (is_read) { + bio->bi_opf = REQ_OP_READ; +- if (dio->should_dirty) ++ if (dio->flags & DIO_SHOULD_DIRTY) + bio_set_pages_dirty(bio); + } else { + bio->bi_opf = dio_bio_write_op(iocb); +@@ -255,7 +261,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + WRITE_ONCE(iocb->private, bio); + break; + } +- if (!dio->multi_bio) { ++ if (!(dio->flags & DIO_MULTI_BIO)) { + /* + * AIO needs an extra reference to ensure the dio + * structure which is embedded into the first bio +@@ -263,7 +269,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + */ + if (!is_sync) + bio_get(bio); +- dio->multi_bio = true; ++ dio->flags |= DIO_MULTI_BIO; + atomic_set(&dio->ref, 2); + do_poll = false; + } else { +-- +2.35.3 + diff --git a/patches.suse/bpf-Add-SO_RCVBUF-SO_SNDBUF-in-_bpf_getsockopt.patch b/patches.suse/bpf-Add-SO_RCVBUF-SO_SNDBUF-in-_bpf_getsockopt.patch new file mode 100644 index 0000000..08a8694 --- /dev/null +++ b/patches.suse/bpf-Add-SO_RCVBUF-SO_SNDBUF-in-_bpf_getsockopt.patch @@ -0,0 +1,32 @@ +From: Kuniyuki Iwashima +Date: Tue, 4 Jan 2022 10:31:49 +0900 +Subject: bpf: Add SO_RCVBUF/SO_SNDBUF in _bpf_getsockopt(). +Patch-mainline: v5.17-rc1 +Git-commit: 28479934f26bcf9ddeb94125e05ddc5c4312b1f3 +References: jsc#PED-1368 + +This patch exposes SO_RCVBUF/SO_SNDBUF through bpf_getsockopt(). + +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220104013153.97906-3-kuniyu@amazon.co.jp +Acked-by: Shung-Hsi Yu +--- + net/core/filter.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -4986,6 +4986,12 @@ static int _bpf_getsockopt(struct sock * + goto err_clear; + + switch (optname) { ++ case SO_RCVBUF: ++ *((int *)optval) = sk->sk_rcvbuf; ++ break; ++ case SO_SNDBUF: ++ *((int *)optval) = sk->sk_sndbuf; ++ break; + case SO_MARK: + *((int *)optval) = sk->sk_mark; + break; diff --git a/patches.suse/bpf-Add-bpf_core_add_cands-and-wire-it-into-bpf_core.patch b/patches.suse/bpf-Add-bpf_core_add_cands-and-wire-it-into-bpf_core.patch new file mode 100644 index 0000000..37cdd3c --- /dev/null +++ b/patches.suse/bpf-Add-bpf_core_add_cands-and-wire-it-into-bpf_core.patch @@ -0,0 +1,424 @@ +From: Alexei Starovoitov +Date: Wed, 1 Dec 2021 10:10:31 -0800 +Subject: bpf: Add bpf_core_add_cands() and wire it into + bpf_core_apply_relo_insn(). +Patch-mainline: v5.17-rc1 +Git-commit: 1e89106da25390826608ad6ac0edfb7c9952eff3 +References: jsc#PED-1368 + +Given BPF program's BTF root type name perform the following steps: +. search in vmlinux candidate cache. +. if (present in cache and candidate list >= 1) return candidate list. +. do a linear search through kernel BTFs for possible candidates. +. regardless of number of candidates found populate vmlinux cache. +. if (candidate list >= 1) return candidate list. +. search in module candidate cache. +. if (present in cache) return candidate list (even if list is empty). +. do a linear search through BTFs of all kernel modules + collecting candidates from all of them. +. regardless of number of candidates found populate module cache. +. return candidate list. +Then wire the result into bpf_core_apply_relo_insn(). + +When BPF program is trying to CO-RE relocate a type +that doesn't exist in either vmlinux BTF or in modules BTFs +these steps will perform 2 cache lookups when cache is hit. + +Note the cache doesn't prevent the abuse by the program that might +have lots of relocations that cannot be resolved. Hence cond_resched(). + +CO-RE in the kernel requires CAP_BPF, since BTF loading requires it. + +Signed-off-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211201181040.23337-9-alexei.starovoitov@gmail.com +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/btf.c | 346 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 345 insertions(+), 1 deletion(-) + +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -25,6 +25,7 @@ + #include + #include + #include ++#include "../tools/lib/bpf/relo_core.h" + + /* BTF (BPF Type Format) is the meta data format which describes + * the data types of BPF program/map. Hence, it basically focus +@@ -6169,6 +6170,8 @@ btf_module_read(struct file *file, struc + return len; + } + ++static void purge_cand_cache(struct btf *btf); ++ + static int btf_module_notify(struct notifier_block *nb, unsigned long op, + void *module) + { +@@ -6203,6 +6206,7 @@ static int btf_module_notify(struct noti + goto out; + } + ++ purge_cand_cache(NULL); + mutex_lock(&btf_module_mutex); + btf_mod->module = module; + btf_mod->btf = btf; +@@ -6245,6 +6249,7 @@ static int btf_module_notify(struct noti + list_del(&btf_mod->list); + if (btf_mod->sysfs_attr) + sysfs_remove_bin_file(btf_kobj, btf_mod->sysfs_attr); ++ purge_cand_cache(btf_mod->btf); + btf_put(btf_mod->btf); + kfree(btf_mod->sysfs_attr); + kfree(btf_mod); +@@ -6433,8 +6438,347 @@ size_t bpf_core_essential_name_len(const + return n; + } + ++struct bpf_cand_cache { ++ const char *name; ++ u32 name_len; ++ u16 kind; ++ u16 cnt; ++ struct { ++ const struct btf *btf; ++ u32 id; ++ } cands[]; ++}; ++ ++static void bpf_free_cands(struct bpf_cand_cache *cands) ++{ ++ if (!cands->cnt) ++ /* empty candidate array was allocated on stack */ ++ return; ++ kfree(cands); ++} ++ ++static void bpf_free_cands_from_cache(struct bpf_cand_cache *cands) ++{ ++ kfree(cands->name); ++ kfree(cands); ++} ++ ++#define VMLINUX_CAND_CACHE_SIZE 31 ++static struct bpf_cand_cache *vmlinux_cand_cache[VMLINUX_CAND_CACHE_SIZE]; ++ ++#define MODULE_CAND_CACHE_SIZE 31 ++static struct bpf_cand_cache *module_cand_cache[MODULE_CAND_CACHE_SIZE]; ++ ++static DEFINE_MUTEX(cand_cache_mutex); ++ ++static void __print_cand_cache(struct bpf_verifier_log *log, ++ struct bpf_cand_cache **cache, ++ int cache_size) ++{ ++ struct bpf_cand_cache *cc; ++ int i, j; ++ ++ for (i = 0; i < cache_size; i++) { ++ cc = cache[i]; ++ if (!cc) ++ continue; ++ bpf_log(log, "[%d]%s(", i, cc->name); ++ for (j = 0; j < cc->cnt; j++) { ++ bpf_log(log, "%d", cc->cands[j].id); ++ if (j < cc->cnt - 1) ++ bpf_log(log, " "); ++ } ++ bpf_log(log, "), "); ++ } ++} ++ ++static void print_cand_cache(struct bpf_verifier_log *log) ++{ ++ mutex_lock(&cand_cache_mutex); ++ bpf_log(log, "vmlinux_cand_cache:"); ++ __print_cand_cache(log, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE); ++ bpf_log(log, "\nmodule_cand_cache:"); ++ __print_cand_cache(log, module_cand_cache, MODULE_CAND_CACHE_SIZE); ++ bpf_log(log, "\n"); ++ mutex_unlock(&cand_cache_mutex); ++} ++ ++static u32 hash_cands(struct bpf_cand_cache *cands) ++{ ++ return jhash(cands->name, cands->name_len, 0); ++} ++ ++static struct bpf_cand_cache *check_cand_cache(struct bpf_cand_cache *cands, ++ struct bpf_cand_cache **cache, ++ int cache_size) ++{ ++ struct bpf_cand_cache *cc = cache[hash_cands(cands) % cache_size]; ++ ++ if (cc && cc->name_len == cands->name_len && ++ !strncmp(cc->name, cands->name, cands->name_len)) ++ return cc; ++ return NULL; ++} ++ ++static size_t sizeof_cands(int cnt) ++{ ++ return offsetof(struct bpf_cand_cache, cands[cnt]); ++} ++ ++static struct bpf_cand_cache *populate_cand_cache(struct bpf_cand_cache *cands, ++ struct bpf_cand_cache **cache, ++ int cache_size) ++{ ++ struct bpf_cand_cache **cc = &cache[hash_cands(cands) % cache_size], *new_cands; ++ ++ if (*cc) { ++ bpf_free_cands_from_cache(*cc); ++ *cc = NULL; ++ } ++ new_cands = kmalloc(sizeof_cands(cands->cnt), GFP_KERNEL); ++ if (!new_cands) { ++ bpf_free_cands(cands); ++ return ERR_PTR(-ENOMEM); ++ } ++ memcpy(new_cands, cands, sizeof_cands(cands->cnt)); ++ /* strdup the name, since it will stay in cache. ++ * the cands->name points to strings in prog's BTF and the prog can be unloaded. ++ */ ++ new_cands->name = kmemdup_nul(cands->name, cands->name_len, GFP_KERNEL); ++ bpf_free_cands(cands); ++ if (!new_cands->name) { ++ kfree(new_cands); ++ return ERR_PTR(-ENOMEM); ++ } ++ *cc = new_cands; ++ return new_cands; ++} ++ ++static void __purge_cand_cache(struct btf *btf, struct bpf_cand_cache **cache, ++ int cache_size) ++{ ++ struct bpf_cand_cache *cc; ++ int i, j; ++ ++ for (i = 0; i < cache_size; i++) { ++ cc = cache[i]; ++ if (!cc) ++ continue; ++ if (!btf) { ++ /* when new module is loaded purge all of module_cand_cache, ++ * since new module might have candidates with the name ++ * that matches cached cands. ++ */ ++ bpf_free_cands_from_cache(cc); ++ cache[i] = NULL; ++ continue; ++ } ++ /* when module is unloaded purge cache entries ++ * that match module's btf ++ */ ++ for (j = 0; j < cc->cnt; j++) ++ if (cc->cands[j].btf == btf) { ++ bpf_free_cands_from_cache(cc); ++ cache[i] = NULL; ++ break; ++ } ++ } ++ ++} ++ ++static void purge_cand_cache(struct btf *btf) ++{ ++ mutex_lock(&cand_cache_mutex); ++ __purge_cand_cache(btf, module_cand_cache, MODULE_CAND_CACHE_SIZE); ++ mutex_unlock(&cand_cache_mutex); ++} ++ ++static struct bpf_cand_cache * ++bpf_core_add_cands(struct bpf_cand_cache *cands, const struct btf *targ_btf, ++ int targ_start_id) ++{ ++ struct bpf_cand_cache *new_cands; ++ const struct btf_type *t; ++ const char *targ_name; ++ size_t targ_essent_len; ++ int n, i; ++ ++ n = btf_nr_types(targ_btf); ++ for (i = targ_start_id; i < n; i++) { ++ t = btf_type_by_id(targ_btf, i); ++ if (btf_kind(t) != cands->kind) ++ continue; ++ ++ targ_name = btf_name_by_offset(targ_btf, t->name_off); ++ if (!targ_name) ++ continue; ++ ++ /* the resched point is before strncmp to make sure that search ++ * for non-existing name will have a chance to schedule(). ++ */ ++ cond_resched(); ++ ++ if (strncmp(cands->name, targ_name, cands->name_len) != 0) ++ continue; ++ ++ targ_essent_len = bpf_core_essential_name_len(targ_name); ++ if (targ_essent_len != cands->name_len) ++ continue; ++ ++ /* most of the time there is only one candidate for a given kind+name pair */ ++ new_cands = kmalloc(sizeof_cands(cands->cnt + 1), GFP_KERNEL); ++ if (!new_cands) { ++ bpf_free_cands(cands); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ memcpy(new_cands, cands, sizeof_cands(cands->cnt)); ++ bpf_free_cands(cands); ++ cands = new_cands; ++ cands->cands[cands->cnt].btf = targ_btf; ++ cands->cands[cands->cnt].id = i; ++ cands->cnt++; ++ } ++ return cands; ++} ++ ++static struct bpf_cand_cache * ++bpf_core_find_cands(struct bpf_core_ctx *ctx, u32 local_type_id) ++{ ++ struct bpf_cand_cache *cands, *cc, local_cand = {}; ++ const struct btf *local_btf = ctx->btf; ++ const struct btf_type *local_type; ++ const struct btf *main_btf; ++ size_t local_essent_len; ++ struct btf *mod_btf; ++ const char *name; ++ int id; ++ ++ main_btf = bpf_get_btf_vmlinux(); ++ if (IS_ERR(main_btf)) ++ return (void *)main_btf; ++ ++ local_type = btf_type_by_id(local_btf, local_type_id); ++ if (!local_type) ++ return ERR_PTR(-EINVAL); ++ ++ name = btf_name_by_offset(local_btf, local_type->name_off); ++ if (str_is_empty(name)) ++ return ERR_PTR(-EINVAL); ++ local_essent_len = bpf_core_essential_name_len(name); ++ ++ cands = &local_cand; ++ cands->name = name; ++ cands->kind = btf_kind(local_type); ++ cands->name_len = local_essent_len; ++ ++ cc = check_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE); ++ /* cands is a pointer to stack here */ ++ if (cc) { ++ if (cc->cnt) ++ return cc; ++ goto check_modules; ++ } ++ ++ /* Attempt to find target candidates in vmlinux BTF first */ ++ cands = bpf_core_add_cands(cands, main_btf, 1); ++ if (IS_ERR(cands)) ++ return cands; ++ ++ /* cands is a pointer to kmalloced memory here if cands->cnt > 0 */ ++ ++ /* populate cache even when cands->cnt == 0 */ ++ cc = populate_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE); ++ if (IS_ERR(cc)) ++ return cc; ++ ++ /* if vmlinux BTF has any candidate, don't go for module BTFs */ ++ if (cc->cnt) ++ return cc; ++ ++check_modules: ++ /* cands is a pointer to stack here and cands->cnt == 0 */ ++ cc = check_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE); ++ if (cc) ++ /* if cache has it return it even if cc->cnt == 0 */ ++ return cc; ++ ++ /* If candidate is not found in vmlinux's BTF then search in module's BTFs */ ++ spin_lock_bh(&btf_idr_lock); ++ idr_for_each_entry(&btf_idr, mod_btf, id) { ++ if (!btf_is_module(mod_btf)) ++ continue; ++ /* linear search could be slow hence unlock/lock ++ * the IDR to avoiding holding it for too long ++ */ ++ btf_get(mod_btf); ++ spin_unlock_bh(&btf_idr_lock); ++ cands = bpf_core_add_cands(cands, mod_btf, btf_nr_types(main_btf)); ++ if (IS_ERR(cands)) { ++ btf_put(mod_btf); ++ return cands; ++ } ++ spin_lock_bh(&btf_idr_lock); ++ btf_put(mod_btf); ++ } ++ spin_unlock_bh(&btf_idr_lock); ++ /* cands is a pointer to kmalloced memory here if cands->cnt > 0 ++ * or pointer to stack if cands->cnd == 0. ++ * Copy it into the cache even when cands->cnt == 0 and ++ * return the result. ++ */ ++ return populate_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE); ++} ++ + int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, + int relo_idx, void *insn) + { +- return -EOPNOTSUPP; ++ bool need_cands = relo->kind != BPF_CORE_TYPE_ID_LOCAL; ++ struct bpf_core_cand_list cands = {}; ++ int err; ++ ++ if (need_cands) { ++ struct bpf_cand_cache *cc; ++ int i; ++ ++ mutex_lock(&cand_cache_mutex); ++ cc = bpf_core_find_cands(ctx, relo->type_id); ++ if (IS_ERR(cc)) { ++ bpf_log(ctx->log, "target candidate search failed for %d\n", ++ relo->type_id); ++ err = PTR_ERR(cc); ++ goto out; ++ } ++ if (cc->cnt) { ++ cands.cands = kcalloc(cc->cnt, sizeof(*cands.cands), GFP_KERNEL); ++ if (!cands.cands) { ++ err = -ENOMEM; ++ goto out; ++ } ++ } ++ for (i = 0; i < cc->cnt; i++) { ++ bpf_log(ctx->log, ++ "CO-RE relocating %s %s: found target candidate [%d]\n", ++ btf_kind_str[cc->kind], cc->name, cc->cands[i].id); ++ cands.cands[i].btf = cc->cands[i].btf; ++ cands.cands[i].id = cc->cands[i].id; ++ } ++ cands.len = cc->cnt; ++ /* cand_cache_mutex needs to span the cache lookup and ++ * copy of btf pointer into bpf_core_cand_list, ++ * since module can be unloaded while bpf_core_apply_relo_insn ++ * is working with module's btf. ++ */ ++ } ++ ++ err = bpf_core_apply_relo_insn((void *)ctx->log, insn, relo->insn_off / 8, ++ relo, relo_idx, ctx->btf, &cands); ++out: ++ if (need_cands) { ++ kfree(cands.cands); ++ mutex_unlock(&cand_cache_mutex); ++ if (ctx->log->level & BPF_LOG_LEVEL2) ++ print_cand_cache(ctx->log); ++ } ++ return err; + } diff --git a/patches.suse/bpf-Add-bpf_loop-helper.patch b/patches.suse/bpf-Add-bpf_loop-helper.patch new file mode 100644 index 0000000..f7002a2 --- /dev/null +++ b/patches.suse/bpf-Add-bpf_loop-helper.patch @@ -0,0 +1,309 @@ +From: Joanne Koong +Date: Mon, 29 Nov 2021 19:06:19 -0800 +Subject: bpf: Add bpf_loop helper +Patch-mainline: v5.17-rc1 +Git-commit: e6f2dd0f80674e9d5960337b3e9c2a242441b326 +References: jsc#PED-1368 + +This patch adds the kernel-side and API changes for a new helper +function, bpf_loop: + +long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, +u64 flags); + +where long (*callback_fn)(u32 index, void *ctx); + +bpf_loop invokes the "callback_fn" **nr_loops** times or until the +callback_fn returns 1. The callback_fn can only return 0 or 1, and +this is enforced by the verifier. The callback_fn index is zero-indexed. + +A few things to please note: +~ The "u64 flags" parameter is currently unused but is included in +case a future use case for it arises. +~ In the kernel-side implementation of bpf_loop (kernel/bpf/bpf_iter.c), +bpf_callback_t is used as the callback function cast. +~ A program can have nested bpf_loop calls but the program must +still adhere to the verifier constraint of its stack depth (the stack depth +cannot exceed MAX_BPF_STACK)) +~ Recursive callback_fns do not pass the verifier, due to the call stack +for these being too deep. +~ The next patch will include the tests and benchmark + +Signed-off-by: Joanne Koong +Signed-off-by: Alexei Starovoitov +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211130030622.4131246-2-joannekoong@fb.com +Acked-by: Shung-Hsi Yu +--- + include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 25 +++++++++++ + kernel/bpf/bpf_iter.c | 35 ++++++++++++++++ + kernel/bpf/helpers.c | 2 + kernel/bpf/verifier.c | 88 +++++++++++++++++++++++++---------------- + tools/include/uapi/linux/bpf.h | 25 +++++++++++ + 6 files changed, 142 insertions(+), 34 deletions(-) + +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -2155,6 +2155,7 @@ extern const struct bpf_func_proto bpf_s + extern const struct bpf_func_proto bpf_sk_getsockopt_proto; + extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto; + extern const struct bpf_func_proto bpf_find_vma_proto; ++extern const struct bpf_func_proto bpf_loop_proto; + + const struct bpf_func_proto *tracing_prog_func_proto( + enum bpf_func_id func_id, const struct bpf_prog *prog); +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -4957,6 +4957,30 @@ union bpf_attr { + * **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*. + * **-EBUSY** if failed to try lock mmap_lock. + * **-EINVAL** for invalid **flags**. ++ * ++ * long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags) ++ * Description ++ * For **nr_loops**, call **callback_fn** function ++ * with **callback_ctx** as the context parameter. ++ * The **callback_fn** should be a static function and ++ * the **callback_ctx** should be a pointer to the stack. ++ * The **flags** is used to control certain aspects of the helper. ++ * Currently, the **flags** must be 0. Currently, nr_loops is ++ * limited to 1 << 23 (~8 million) loops. ++ * ++ * long (\*callback_fn)(u32 index, void \*ctx); ++ * ++ * where **index** is the current index in the loop. The index ++ * is zero-indexed. ++ * ++ * If **callback_fn** returns 0, the helper will continue to the next ++ * loop. If return value is 1, the helper will skip the rest of ++ * the loops and return. Other return values are not used now, ++ * and will be rejected by the verifier. ++ * ++ * Return ++ * The number of loops performed, **-EINVAL** for invalid **flags**, ++ * **-E2BIG** if **nr_loops** exceeds the maximum number of loops. + */ + #define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ +@@ -5140,6 +5164,7 @@ union bpf_attr { + FN(skc_to_unix_sock), \ + FN(kallsyms_lookup_name), \ + FN(find_vma), \ ++ FN(loop), \ + /* */ + + /* integer value in 'imm' field of BPF_CALL instruction selects which helper +--- a/kernel/bpf/bpf_iter.c ++++ b/kernel/bpf/bpf_iter.c +@@ -714,3 +714,38 @@ const struct bpf_func_proto bpf_for_each + .arg3_type = ARG_PTR_TO_STACK_OR_NULL, + .arg4_type = ARG_ANYTHING, + }; ++ ++/* maximum number of loops */ ++#define MAX_LOOPS BIT(23) ++ ++BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx, ++ u64, flags) ++{ ++ bpf_callback_t callback = (bpf_callback_t)callback_fn; ++ u64 ret; ++ u32 i; ++ ++ if (flags) ++ return -EINVAL; ++ if (nr_loops > MAX_LOOPS) ++ return -E2BIG; ++ ++ for (i = 0; i < nr_loops; i++) { ++ ret = callback((u64)i, (u64)(long)callback_ctx, 0, 0, 0); ++ /* return value: 0 - continue, 1 - stop and return */ ++ if (ret) ++ return i + 1; ++ } ++ ++ return i; ++} ++ ++const struct bpf_func_proto bpf_loop_proto = { ++ .func = bpf_loop, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_ANYTHING, ++ .arg2_type = ARG_PTR_TO_FUNC, ++ .arg3_type = ARG_PTR_TO_STACK_OR_NULL, ++ .arg4_type = ARG_ANYTHING, ++}; +--- a/kernel/bpf/helpers.c ++++ b/kernel/bpf/helpers.c +@@ -1376,6 +1376,8 @@ bpf_base_func_proto(enum bpf_func_id fun + return &bpf_ringbuf_query_proto; + case BPF_FUNC_for_each_map_elem: + return &bpf_for_each_map_elem_proto; ++ case BPF_FUNC_loop: ++ return &bpf_loop_proto; + default: + break; + } +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -6120,6 +6120,27 @@ static int set_map_elem_callback_state(s + return 0; + } + ++static int set_loop_callback_state(struct bpf_verifier_env *env, ++ struct bpf_func_state *caller, ++ struct bpf_func_state *callee, ++ int insn_idx) ++{ ++ /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, ++ * u64 flags); ++ * callback_fn(u32 index, void *callback_ctx); ++ */ ++ callee->regs[BPF_REG_1].type = SCALAR_VALUE; ++ callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3]; ++ ++ /* unused */ ++ __mark_reg_not_init(env, &callee->regs[BPF_REG_3]); ++ __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); ++ __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); ++ ++ callee->in_callback_fn = true; ++ return 0; ++} ++ + static int set_timer_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, +@@ -6493,13 +6514,7 @@ static int check_helper_call(struct bpf_ + return err; + } + +- if (func_id == BPF_FUNC_tail_call) { +- err = check_reference_leak(env); +- if (err) { +- verbose(env, "tail_call would lead to reference leak\n"); +- return err; +- } +- } else if (is_release_function(func_id)) { ++ if (is_release_function(func_id)) { + err = release_reference(env, meta.ref_obj_id); + if (err) { + verbose(env, "func %s#%d reference has not been acquired before\n", +@@ -6510,42 +6525,47 @@ static int check_helper_call(struct bpf_ + + regs = cur_regs(env); + +- /* check that flags argument in get_local_storage(map, flags) is 0, +- * this is required because get_local_storage() can't return an error. +- */ +- if (func_id == BPF_FUNC_get_local_storage && +- !register_is_null(®s[BPF_REG_2])) { +- verbose(env, "get_local_storage() doesn't support non-zero flags\n"); +- return -EINVAL; +- } +- +- if (func_id == BPF_FUNC_for_each_map_elem) { ++ switch (func_id) { ++ case BPF_FUNC_tail_call: ++ err = check_reference_leak(env); ++ if (err) { ++ verbose(env, "tail_call would lead to reference leak\n"); ++ return err; ++ } ++ break; ++ case BPF_FUNC_get_local_storage: ++ /* check that flags argument in get_local_storage(map, flags) is 0, ++ * this is required because get_local_storage() can't return an error. ++ */ ++ if (!register_is_null(®s[BPF_REG_2])) { ++ verbose(env, "get_local_storage() doesn't support non-zero flags\n"); ++ return -EINVAL; ++ } ++ break; ++ case BPF_FUNC_for_each_map_elem: + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_map_elem_callback_state); +- if (err < 0) +- return -EINVAL; +- } +- +- if (func_id == BPF_FUNC_timer_set_callback) { ++ break; ++ case BPF_FUNC_timer_set_callback: + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_timer_callback_state); +- if (err < 0) +- return -EINVAL; +- } +- +- if (func_id == BPF_FUNC_find_vma) { ++ break; ++ case BPF_FUNC_find_vma: + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_find_vma_callback_state); +- if (err < 0) +- return -EINVAL; +- } +- +- if (func_id == BPF_FUNC_snprintf) { ++ break; ++ case BPF_FUNC_snprintf: + err = check_bpf_snprintf_call(env, regs); +- if (err < 0) +- return err; ++ break; ++ case BPF_FUNC_loop: ++ err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, ++ set_loop_callback_state); ++ break; + } + ++ if (err) ++ return err; ++ + /* reset caller saved regs */ + for (i = 0; i < CALLER_SAVED_REGS; i++) { + mark_reg_not_init(env, regs, caller_saved[i]); +--- a/tools/include/uapi/linux/bpf.h ++++ b/tools/include/uapi/linux/bpf.h +@@ -4960,6 +4960,30 @@ union bpf_attr { + * **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*. + * **-EBUSY** if failed to try lock mmap_lock. + * **-EINVAL** for invalid **flags**. ++ * ++ * long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags) ++ * Description ++ * For **nr_loops**, call **callback_fn** function ++ * with **callback_ctx** as the context parameter. ++ * The **callback_fn** should be a static function and ++ * the **callback_ctx** should be a pointer to the stack. ++ * The **flags** is used to control certain aspects of the helper. ++ * Currently, the **flags** must be 0. Currently, nr_loops is ++ * limited to 1 << 23 (~8 million) loops. ++ * ++ * long (\*callback_fn)(u32 index, void \*ctx); ++ * ++ * where **index** is the current index in the loop. The index ++ * is zero-indexed. ++ * ++ * If **callback_fn** returns 0, the helper will continue to the next ++ * loop. If return value is 1, the helper will skip the rest of ++ * the loops and return. Other return values are not used now, ++ * and will be rejected by the verifier. ++ * ++ * Return ++ * The number of loops performed, **-EINVAL** for invalid **flags**, ++ * **-E2BIG** if **nr_loops** exceeds the maximum number of loops. + */ + #define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ +@@ -5143,6 +5167,7 @@ union bpf_attr { + FN(skc_to_unix_sock), \ + FN(kallsyms_lookup_name), \ + FN(find_vma), \ ++ FN(loop), \ + /* */ + + /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/patches.suse/bpf-Add-bpf_strncmp-helper.patch b/patches.suse/bpf-Add-bpf_strncmp-helper.patch new file mode 100644 index 0000000..73556d1 --- /dev/null +++ b/patches.suse/bpf-Add-bpf_strncmp-helper.patch @@ -0,0 +1,119 @@ +From: Hou Tao +Date: Fri, 10 Dec 2021 22:16:49 +0800 +Subject: bpf: Add bpf_strncmp helper +Patch-mainline: v5.17-rc1 +Git-commit: c5fb19937455095573a19ddcbff32e993ed10e35 +References: jsc#PED-1368 + +The helper compares two strings: one string is a null-terminated +read-only string, and another string has const max storage size +but doesn't need to be null-terminated. It can be used to compare +file name in tracing or LSM program. + +Signed-off-by: Hou Tao +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211210141652.877186-2-houtao1@huawei.com +Acked-by: Shung-Hsi Yu +--- + include/linux/bpf.h | 1 + + include/uapi/linux/bpf.h | 11 +++++++++++ + kernel/bpf/helpers.c | 16 ++++++++++++++++ + tools/include/uapi/linux/bpf.h | 11 +++++++++++ + 4 files changed, 39 insertions(+) + +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -2164,6 +2164,7 @@ extern const struct bpf_func_proto bpf_s + extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto; + extern const struct bpf_func_proto bpf_find_vma_proto; + extern const struct bpf_func_proto bpf_loop_proto; ++extern const struct bpf_func_proto bpf_strncmp_proto; + + const struct bpf_func_proto *tracing_prog_func_proto( + enum bpf_func_id func_id, const struct bpf_prog *prog); +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -4983,6 +4983,16 @@ union bpf_attr { + * Return + * The number of loops performed, **-EINVAL** for invalid **flags**, + * **-E2BIG** if **nr_loops** exceeds the maximum number of loops. ++ * ++ * long bpf_strncmp(const char *s1, u32 s1_sz, const char *s2) ++ * Description ++ * Do strncmp() between **s1** and **s2**. **s1** doesn't need ++ * to be null-terminated and **s1_sz** is the maximum storage ++ * size of **s1**. **s2** must be a read-only string. ++ * Return ++ * An integer less than, equal to, or greater than zero ++ * if the first **s1_sz** bytes of **s1** is found to be ++ * less than, to match, or be greater than **s2**. + */ + #define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ +@@ -5167,6 +5177,7 @@ union bpf_attr { + FN(kallsyms_lookup_name), \ + FN(find_vma), \ + FN(loop), \ ++ FN(strncmp), \ + /* */ + + /* integer value in 'imm' field of BPF_CALL instruction selects which helper +--- a/kernel/bpf/helpers.c ++++ b/kernel/bpf/helpers.c +@@ -565,6 +565,20 @@ const struct bpf_func_proto bpf_strtoul_ + }; + #endif + ++BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2) ++{ ++ return strncmp(s1, s2, s1_sz); ++} ++ ++const struct bpf_func_proto bpf_strncmp_proto = { ++ .func = bpf_strncmp, ++ .gpl_only = false, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_CONST_SIZE, ++ .arg3_type = ARG_PTR_TO_CONST_STR, ++}; ++ + BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino, + struct bpf_pidns_info *, nsdata, u32, size) + { +@@ -1378,6 +1392,8 @@ bpf_base_func_proto(enum bpf_func_id fun + return &bpf_for_each_map_elem_proto; + case BPF_FUNC_loop: + return &bpf_loop_proto; ++ case BPF_FUNC_strncmp: ++ return &bpf_strncmp_proto; + default: + break; + } +--- a/tools/include/uapi/linux/bpf.h ++++ b/tools/include/uapi/linux/bpf.h +@@ -4986,6 +4986,16 @@ union bpf_attr { + * Return + * The number of loops performed, **-EINVAL** for invalid **flags**, + * **-E2BIG** if **nr_loops** exceeds the maximum number of loops. ++ * ++ * long bpf_strncmp(const char *s1, u32 s1_sz, const char *s2) ++ * Description ++ * Do strncmp() between **s1** and **s2**. **s1** doesn't need ++ * to be null-terminated and **s1_sz** is the maximum storage ++ * size of **s1**. **s2** must be a read-only string. ++ * Return ++ * An integer less than, equal to, or greater than zero ++ * if the first **s1_sz** bytes of **s1** is found to be ++ * less than, to match, or be greater than **s2**. + */ + #define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ +@@ -5170,6 +5180,7 @@ union bpf_attr { + FN(kallsyms_lookup_name), \ + FN(find_vma), \ + FN(loop), \ ++ FN(strncmp), \ + /* */ + + /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/patches.suse/bpf-Add-check_func_arg_reg_off-function.patch b/patches.suse/bpf-Add-check_func_arg_reg_off-function.patch index 6994523..1a19d89 100644 --- a/patches.suse/bpf-Add-check_func_arg_reg_off-function.patch +++ b/patches.suse/bpf-Add-check_func_arg_reg_off-function.patch @@ -6,7 +6,6 @@ Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux Git-commit: 25b35dd28138f61f9a0fb8b76c0483761fd228bd References: git-fixes X-Info: dependency for "bpf: Fix PTR_TO_BTF_ID var_off check" 655efe5089f077485eec848272bd7e26b1a5a735 -X-Info: adjusted for context, missing MEM_ALLOC flag introduced in "bpf: Fix ringbuf memory type confusion when passing to helpers" a672b2e36a648afb04ad3bda93b6bda947a479a5 Lift the list of register types allowed for having fixed and variable offsets when passed as helper function arguments into a common helper, @@ -21,12 +20,12 @@ Link: https://lore.kernel.org/bpf/20220304224645.3677453-2-memxor@gmail.com Acked-by: Shung-Hsi Yu --- include/linux/bpf_verifier.h | 3 + - kernel/bpf/verifier.c | 67 +++++++++++++++++++++++++------------------ - 2 files changed, 43 insertions(+), 27 deletions(-) + kernel/bpf/verifier.c | 69 +++++++++++++++++++++++++------------------ + 2 files changed, 44 insertions(+), 28 deletions(-) --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h -@@ -496,6 +496,9 @@ bpf_prog_offload_remove_insns(struct bpf +@@ -521,6 +521,9 @@ bpf_prog_offload_remove_insns(struct bpf int check_ptr_off_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno); @@ -38,7 +37,7 @@ Acked-by: Shung-Hsi Yu --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c -@@ -4879,6 +4879,43 @@ found: +@@ -5264,6 +5264,44 @@ found: return 0; } @@ -58,6 +57,7 @@ Acked-by: Shung-Hsi Yu + case PTR_TO_MAP_VALUE: + case PTR_TO_MEM: + case PTR_TO_MEM | MEM_RDONLY: ++ case PTR_TO_MEM | MEM_ALLOC: + case PTR_TO_BUF: + case PTR_TO_BUF | MEM_RDONLY: + case PTR_TO_STACK: @@ -82,7 +82,7 @@ Acked-by: Shung-Hsi Yu static int check_func_arg(struct bpf_verifier_env *env, u32 arg, struct bpf_call_arg_meta *meta, const struct bpf_func_proto *fn) -@@ -4928,33 +4965,9 @@ static int check_func_arg(struct bpf_ver +@@ -5313,34 +5351,9 @@ static int check_func_arg(struct bpf_ver if (err) return err; @@ -95,6 +95,7 @@ Acked-by: Shung-Hsi Yu - case PTR_TO_MAP_VALUE: - case PTR_TO_MEM: - case PTR_TO_MEM | MEM_RDONLY: +- case PTR_TO_MEM | MEM_ALLOC: - case PTR_TO_BUF: - case PTR_TO_BUF | MEM_RDONLY: - case PTR_TO_STACK: diff --git a/patches.suse/bpf-Add-get_func_-arg-ret-arg_cnt-helpers.patch b/patches.suse/bpf-Add-get_func_-arg-ret-arg_cnt-helpers.patch new file mode 100644 index 0000000..8239af1 --- /dev/null +++ b/patches.suse/bpf-Add-get_func_-arg-ret-arg_cnt-helpers.patch @@ -0,0 +1,401 @@ +From: Jiri Olsa +Date: Wed, 8 Dec 2021 20:32:44 +0100 +Subject: bpf: Add get_func_[arg|ret|arg_cnt] helpers +Patch-mainline: v5.17-rc1 +Git-commit: f92c1e183604c20ce00eb889315fdaa8f2d9e509 +References: jsc#PED-1368 + +Adding following helpers for tracing programs: + +Get n-th argument of the traced function: + long bpf_get_func_arg(void *ctx, u32 n, u64 *value) + +Get return value of the traced function: + long bpf_get_func_ret(void *ctx, u64 *value) + +Get arguments count of the traced function: + long bpf_get_func_arg_cnt(void *ctx) + +The trampoline now stores number of arguments on ctx-8 +address, so it's easy to verify argument index and find +return value argument's position. + +Moving function ip address on the trampoline stack behind +the number of functions arguments, so it's now stored on +ctx-16 address if it's needed. + +All helpers above are inlined by verifier. + +Also bit unrelated small change - using newly added function +bpf_prog_has_trampoline in check_get_func_ip. + +Signed-off-by: Jiri Olsa +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211208193245.172141-5-jolsa@kernel.org +Acked-by: Shung-Hsi Yu +--- + arch/x86/net/bpf_jit_comp.c | 15 +++++++ + include/linux/bpf.h | 5 ++ + include/uapi/linux/bpf.h | 28 ++++++++++++++ + kernel/bpf/trampoline.c | 8 ++++ + kernel/bpf/verifier.c | 77 ++++++++++++++++++++++++++++++++++++++--- + kernel/trace/bpf_trace.c | 55 ++++++++++++++++++++++++++++- + tools/include/uapi/linux/bpf.h | 28 ++++++++++++++ + 7 files changed, 209 insertions(+), 7 deletions(-) + +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -1933,7 +1933,7 @@ int arch_prepare_bpf_trampoline(struct b + void *orig_call) + { + int ret, i, nr_args = m->nr_args; +- int regs_off, ip_off, stack_size = nr_args * 8; ++ int regs_off, ip_off, args_off, stack_size = nr_args * 8; + struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY]; + struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT]; + struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN]; +@@ -1960,6 +1960,8 @@ int arch_prepare_bpf_trampoline(struct b + * [ ... ] + * RBP - regs_off [ reg_arg1 ] program's ctx pointer + * ++ * RBP - args_off [ args count ] always ++ * + * RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag + */ + +@@ -1970,6 +1972,10 @@ int arch_prepare_bpf_trampoline(struct b + + regs_off = stack_size; + ++ /* args count */ ++ stack_size += 8; ++ args_off = stack_size; ++ + if (flags & BPF_TRAMP_F_IP_ARG) + stack_size += 8; /* room for IP address argument */ + +@@ -1988,6 +1994,13 @@ int arch_prepare_bpf_trampoline(struct b + EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */ + EMIT1(0x53); /* push rbx */ + ++ /* Store number of arguments of the traced function: ++ * mov rax, nr_args ++ * mov QWORD PTR [rbp - args_off], rax ++ */ ++ emit_mov_imm64(&prog, BPF_REG_0, 0, (u32) nr_args); ++ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -args_off); ++ + if (flags & BPF_TRAMP_F_IP_ARG) { + /* Store IP address of the traced function: + * mov rax, QWORD PTR [rbp + 8] +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -778,6 +778,7 @@ void bpf_ksym_add(struct bpf_ksym *ksym) + void bpf_ksym_del(struct bpf_ksym *ksym); + int bpf_jit_charge_modmem(u32 pages); + void bpf_jit_uncharge_modmem(u32 pages); ++bool bpf_prog_has_trampoline(const struct bpf_prog *prog); + #else + static inline int bpf_trampoline_link_prog(struct bpf_prog *prog, + struct bpf_trampoline *tr) +@@ -806,6 +807,10 @@ static inline bool is_bpf_image_address( + { + return false; + } ++static inline bool bpf_prog_has_trampoline(const struct bpf_prog *prog) ++{ ++ return false; ++} + #endif + + struct bpf_func_info_aux { +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -4993,6 +4993,31 @@ union bpf_attr { + * An integer less than, equal to, or greater than zero + * if the first **s1_sz** bytes of **s1** is found to be + * less than, to match, or be greater than **s2**. ++ * ++ * long bpf_get_func_arg(void *ctx, u32 n, u64 *value) ++ * Description ++ * Get **n**-th argument (zero based) of the traced function (for tracing programs) ++ * returned in **value**. ++ * ++ * Return ++ * 0 on success. ++ * **-EINVAL** if n >= arguments count of traced function. ++ * ++ * long bpf_get_func_ret(void *ctx, u64 *value) ++ * Description ++ * Get return value of the traced function (for tracing programs) ++ * in **value**. ++ * ++ * Return ++ * 0 on success. ++ * **-EOPNOTSUPP** for tracing programs other than BPF_TRACE_FEXIT or BPF_MODIFY_RETURN. ++ * ++ * long bpf_get_func_arg_cnt(void *ctx) ++ * Description ++ * Get number of arguments of the traced function (for tracing programs). ++ * ++ * Return ++ * The number of arguments of the traced function. + */ + #define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ +@@ -5178,6 +5203,9 @@ union bpf_attr { + FN(find_vma), \ + FN(loop), \ + FN(strncmp), \ ++ FN(get_func_arg), \ ++ FN(get_func_ret), \ ++ FN(get_func_arg_cnt), \ + /* */ + + /* integer value in 'imm' field of BPF_CALL instruction selects which helper +--- a/kernel/bpf/trampoline.c ++++ b/kernel/bpf/trampoline.c +@@ -27,6 +27,14 @@ static struct hlist_head trampoline_tabl + /* serializes access to trampoline_table */ + static DEFINE_MUTEX(trampoline_mutex); + ++bool bpf_prog_has_trampoline(const struct bpf_prog *prog) ++{ ++ enum bpf_attach_type eatype = prog->expected_attach_type; ++ ++ return eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT || ++ eatype == BPF_MODIFY_RETURN; ++} ++ + void *bpf_jit_alloc_exec_page(void) + { + void *image; +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -6414,13 +6414,11 @@ static int check_bpf_snprintf_call(struc + + static int check_get_func_ip(struct bpf_verifier_env *env) + { +- enum bpf_attach_type eatype = env->prog->expected_attach_type; + enum bpf_prog_type type = resolve_prog_type(env->prog); + int func_id = BPF_FUNC_get_func_ip; + + if (type == BPF_PROG_TYPE_TRACING) { +- if (eatype != BPF_TRACE_FENTRY && eatype != BPF_TRACE_FEXIT && +- eatype != BPF_MODIFY_RETURN) { ++ if (!bpf_prog_has_trampoline(env->prog)) { + verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n", + func_id_name(func_id), func_id); + return -ENOTSUPP; +@@ -13020,6 +13018,7 @@ static int fixup_kfunc_call(struct bpf_v + static int do_misc_fixups(struct bpf_verifier_env *env) + { + struct bpf_prog *prog = env->prog; ++ enum bpf_attach_type eatype = prog->expected_attach_type; + bool expect_blinding = bpf_jit_blinding_enabled(prog); + enum bpf_prog_type prog_type = resolve_prog_type(prog); + struct bpf_insn *insn = prog->insnsi; +@@ -13390,11 +13389,79 @@ patch_map_ops_generic: + continue; + } + ++ /* Implement bpf_get_func_arg inline. */ ++ if (prog_type == BPF_PROG_TYPE_TRACING && ++ insn->imm == BPF_FUNC_get_func_arg) { ++ /* Load nr_args from ctx - 8 */ ++ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); ++ insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6); ++ insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3); ++ insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1); ++ insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0); ++ insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0); ++ insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0); ++ insn_buf[7] = BPF_JMP_A(1); ++ insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL); ++ cnt = 9; ++ ++ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); ++ if (!new_prog) ++ return -ENOMEM; ++ ++ delta += cnt - 1; ++ env->prog = prog = new_prog; ++ insn = new_prog->insnsi + i + delta; ++ continue; ++ } ++ ++ /* Implement bpf_get_func_ret inline. */ ++ if (prog_type == BPF_PROG_TYPE_TRACING && ++ insn->imm == BPF_FUNC_get_func_ret) { ++ if (eatype == BPF_TRACE_FEXIT || ++ eatype == BPF_MODIFY_RETURN) { ++ /* Load nr_args from ctx - 8 */ ++ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); ++ insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3); ++ insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1); ++ insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0); ++ insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0); ++ insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0); ++ cnt = 6; ++ } else { ++ insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP); ++ cnt = 1; ++ } ++ ++ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); ++ if (!new_prog) ++ return -ENOMEM; ++ ++ delta += cnt - 1; ++ env->prog = prog = new_prog; ++ insn = new_prog->insnsi + i + delta; ++ continue; ++ } ++ ++ /* Implement get_func_arg_cnt inline. */ ++ if (prog_type == BPF_PROG_TYPE_TRACING && ++ insn->imm == BPF_FUNC_get_func_arg_cnt) { ++ /* Load nr_args from ctx - 8 */ ++ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); ++ ++ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1); ++ if (!new_prog) ++ return -ENOMEM; ++ ++ env->prog = prog = new_prog; ++ insn = new_prog->insnsi + i + delta; ++ continue; ++ } ++ + /* Implement bpf_get_func_ip inline. */ + if (prog_type == BPF_PROG_TYPE_TRACING && + insn->imm == BPF_FUNC_get_func_ip) { +- /* Load IP address from ctx - 8 */ +- insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); ++ /* Load IP address from ctx - 16 */ ++ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16); + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1); + if (!new_prog) +--- a/kernel/trace/bpf_trace.c ++++ b/kernel/trace/bpf_trace.c +@@ -1012,7 +1012,7 @@ const struct bpf_func_proto bpf_snprintf + BPF_CALL_1(bpf_get_func_ip_tracing, void *, ctx) + { + /* This helper call is inlined by verifier. */ +- return ((u64 *)ctx)[-1]; ++ return ((u64 *)ctx)[-2]; + } + + static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = { +@@ -1091,6 +1091,53 @@ static const struct bpf_func_proto bpf_g + .arg2_type = ARG_CONST_SIZE_OR_ZERO, + }; + ++BPF_CALL_3(get_func_arg, void *, ctx, u32, n, u64 *, value) ++{ ++ /* This helper call is inlined by verifier. */ ++ u64 nr_args = ((u64 *)ctx)[-1]; ++ ++ if ((u64) n >= nr_args) ++ return -EINVAL; ++ *value = ((u64 *)ctx)[n]; ++ return 0; ++} ++ ++static const struct bpf_func_proto bpf_get_func_arg_proto = { ++ .func = get_func_arg, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_CTX, ++ .arg2_type = ARG_ANYTHING, ++ .arg3_type = ARG_PTR_TO_LONG, ++}; ++ ++BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value) ++{ ++ /* This helper call is inlined by verifier. */ ++ u64 nr_args = ((u64 *)ctx)[-1]; ++ ++ *value = ((u64 *)ctx)[nr_args]; ++ return 0; ++} ++ ++static const struct bpf_func_proto bpf_get_func_ret_proto = { ++ .func = get_func_ret, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_CTX, ++ .arg2_type = ARG_PTR_TO_LONG, ++}; ++ ++BPF_CALL_1(get_func_arg_cnt, void *, ctx) ++{ ++ /* This helper call is inlined by verifier. */ ++ return ((u64 *)ctx)[-1]; ++} ++ ++static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = { ++ .func = get_func_arg_cnt, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_CTX, ++}; ++ + static const struct bpf_func_proto * + bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) + { +@@ -1629,6 +1676,12 @@ tracing_prog_func_proto(enum bpf_func_id + NULL; + case BPF_FUNC_d_path: + return &bpf_d_path_proto; ++ case BPF_FUNC_get_func_arg: ++ return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_proto : NULL; ++ case BPF_FUNC_get_func_ret: ++ return bpf_prog_has_trampoline(prog) ? &bpf_get_func_ret_proto : NULL; ++ case BPF_FUNC_get_func_arg_cnt: ++ return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_cnt_proto : NULL; + default: + fn = raw_tp_prog_func_proto(func_id, prog); + if (!fn && prog->expected_attach_type == BPF_TRACE_ITER) +--- a/tools/include/uapi/linux/bpf.h ++++ b/tools/include/uapi/linux/bpf.h +@@ -4996,6 +4996,31 @@ union bpf_attr { + * An integer less than, equal to, or greater than zero + * if the first **s1_sz** bytes of **s1** is found to be + * less than, to match, or be greater than **s2**. ++ * ++ * long bpf_get_func_arg(void *ctx, u32 n, u64 *value) ++ * Description ++ * Get **n**-th argument (zero based) of the traced function (for tracing programs) ++ * returned in **value**. ++ * ++ * Return ++ * 0 on success. ++ * **-EINVAL** if n >= arguments count of traced function. ++ * ++ * long bpf_get_func_ret(void *ctx, u64 *value) ++ * Description ++ * Get return value of the traced function (for tracing programs) ++ * in **value**. ++ * ++ * Return ++ * 0 on success. ++ * **-EOPNOTSUPP** for tracing programs other than BPF_TRACE_FEXIT or BPF_MODIFY_RETURN. ++ * ++ * long bpf_get_func_arg_cnt(void *ctx) ++ * Description ++ * Get number of arguments of the traced function (for tracing programs). ++ * ++ * Return ++ * The number of arguments of the traced function. + */ + #define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ +@@ -5181,6 +5206,9 @@ union bpf_attr { + FN(find_vma), \ + FN(loop), \ + FN(strncmp), \ ++ FN(get_func_arg), \ ++ FN(get_func_ret), \ ++ FN(get_func_arg_cnt), \ + /* */ + + /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/patches.suse/bpf-Add-ingress_ifindex-to-bpf_sk_lookup.patch b/patches.suse/bpf-Add-ingress_ifindex-to-bpf_sk_lookup.patch new file mode 100644 index 0000000..d5bc2bb --- /dev/null +++ b/patches.suse/bpf-Add-ingress_ifindex-to-bpf_sk_lookup.patch @@ -0,0 +1,240 @@ +From: Mark Pashmfouroush +Date: Wed, 10 Nov 2021 11:10:15 +0000 +Subject: bpf: Add ingress_ifindex to bpf_sk_lookup +Patch-mainline: v5.17-rc1 +Git-commit: f89315650ba34ec6c91a8bded72796980bee2a4d +References: jsc#PED-1368 + +It may be helpful to have access to the ifindex during bpf socket +lookup. An example may be to scope certain socket lookup logic to +specific interfaces, i.e. an interface may be made exempt from custom +lookup code. + +Add the ifindex of the arriving connection to the bpf_sk_lookup API. + +Signed-off-by: Mark Pashmfouroush +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211110111016.5670-2-markpash@cloudflare.com +Acked-by: Shung-Hsi Yu +--- + include/linux/filter.h | 7 +++++-- + include/uapi/linux/bpf.h | 1 + + net/core/filter.c | 7 +++++++ + net/ipv4/inet_hashtables.c | 8 ++++---- + net/ipv4/udp.c | 8 ++++---- + net/ipv6/inet6_hashtables.c | 8 ++++---- + net/ipv6/udp.c | 8 ++++---- + tools/include/uapi/linux/bpf.h | 1 + + 8 files changed, 30 insertions(+), 18 deletions(-) + +--- a/include/linux/filter.h ++++ b/include/linux/filter.h +@@ -1371,6 +1371,7 @@ struct bpf_sk_lookup_kern { + const struct in6_addr *daddr; + } v6; + struct sock *selected_sk; ++ u32 ingress_ifindex; + bool no_reuseport; + }; + +@@ -1433,7 +1434,7 @@ extern struct static_key_false bpf_sk_lo + static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol, + const __be32 saddr, const __be16 sport, + const __be32 daddr, const u16 dport, +- struct sock **psk) ++ const int ifindex, struct sock **psk) + { + struct bpf_prog_array *run_array; + struct sock *selected_sk = NULL; +@@ -1449,6 +1450,7 @@ static inline bool bpf_sk_lookup_run_v4( + .v4.daddr = daddr, + .sport = sport, + .dport = dport, ++ .ingress_ifindex = ifindex, + }; + u32 act; + +@@ -1471,7 +1473,7 @@ static inline bool bpf_sk_lookup_run_v6( + const __be16 sport, + const struct in6_addr *daddr, + const u16 dport, +- struct sock **psk) ++ const int ifindex, struct sock **psk) + { + struct bpf_prog_array *run_array; + struct sock *selected_sk = NULL; +@@ -1487,6 +1489,7 @@ static inline bool bpf_sk_lookup_run_v6( + .v6.daddr = daddr, + .sport = sport, + .dport = dport, ++ .ingress_ifindex = ifindex, + }; + u32 act; + +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -6316,6 +6316,7 @@ struct bpf_sk_lookup { + __u32 local_ip4; /* Network byte order */ + __u32 local_ip6[4]; /* Network byte order */ + __u32 local_port; /* Host byte order */ ++ __u32 ingress_ifindex; /* The arriving interface. Determined by inet_iif. */ + }; + + /* +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -10560,6 +10560,7 @@ static bool sk_lookup_is_valid_access(in + case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]): + case bpf_ctx_range(struct bpf_sk_lookup, remote_port): + case bpf_ctx_range(struct bpf_sk_lookup, local_port): ++ case bpf_ctx_range(struct bpf_sk_lookup, ingress_ifindex): + bpf_ctx_record_field_size(info, sizeof(__u32)); + return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32)); + +@@ -10649,6 +10650,12 @@ static u32 sk_lookup_convert_ctx_access( + bpf_target_off(struct bpf_sk_lookup_kern, + dport, 2, target_size)); + break; ++ ++ case offsetof(struct bpf_sk_lookup, ingress_ifindex): ++ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, ++ bpf_target_off(struct bpf_sk_lookup_kern, ++ ingress_ifindex, 4, target_size)); ++ break; + } + + return insn - insn_buf; +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -307,7 +307,7 @@ static inline struct sock *inet_lookup_r + struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, + __be32 saddr, __be16 sport, +- __be32 daddr, u16 hnum) ++ __be32 daddr, u16 hnum, const int dif) + { + struct sock *sk, *reuse_sk; + bool no_reuseport; +@@ -315,8 +315,8 @@ static inline struct sock *inet_lookup_r + if (hashinfo != &tcp_hashinfo) + return NULL; /* only TCP is supported */ + +- no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP, +- saddr, sport, daddr, hnum, &sk); ++ no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP, saddr, sport, ++ daddr, hnum, dif, &sk); + if (no_reuseport || IS_ERR_OR_NULL(sk)) + return sk; + +@@ -340,7 +340,7 @@ struct sock *__inet_lookup_listener(stru + /* Lookup redirect from BPF */ + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { + result = inet_lookup_run_bpf(net, hashinfo, skb, doff, +- saddr, sport, daddr, hnum); ++ saddr, sport, daddr, hnum, dif); + if (result) + goto done; + } +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -460,7 +460,7 @@ static struct sock *udp4_lookup_run_bpf( + struct udp_table *udptable, + struct sk_buff *skb, + __be32 saddr, __be16 sport, +- __be32 daddr, u16 hnum) ++ __be32 daddr, u16 hnum, const int dif) + { + struct sock *sk, *reuse_sk; + bool no_reuseport; +@@ -468,8 +468,8 @@ static struct sock *udp4_lookup_run_bpf( + if (udptable != &udp_table) + return NULL; /* only UDP is supported */ + +- no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, +- saddr, sport, daddr, hnum, &sk); ++ no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, saddr, sport, ++ daddr, hnum, dif, &sk); + if (no_reuseport || IS_ERR_OR_NULL(sk)) + return sk; + +@@ -505,7 +505,7 @@ struct sock *__udp4_lib_lookup(struct ne + /* Lookup redirect from BPF */ + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { + sk = udp4_lookup_run_bpf(net, udptable, skb, +- saddr, sport, daddr, hnum); ++ saddr, sport, daddr, hnum, dif); + if (sk) { + result = sk; + goto done; +--- a/net/ipv6/inet6_hashtables.c ++++ b/net/ipv6/inet6_hashtables.c +@@ -165,7 +165,7 @@ static inline struct sock *inet6_lookup_ + const struct in6_addr *saddr, + const __be16 sport, + const struct in6_addr *daddr, +- const u16 hnum) ++ const u16 hnum, const int dif) + { + struct sock *sk, *reuse_sk; + bool no_reuseport; +@@ -173,8 +173,8 @@ static inline struct sock *inet6_lookup_ + if (hashinfo != &tcp_hashinfo) + return NULL; /* only TCP is supported */ + +- no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, +- saddr, sport, daddr, hnum, &sk); ++ no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, saddr, sport, ++ daddr, hnum, dif, &sk); + if (no_reuseport || IS_ERR_OR_NULL(sk)) + return sk; + +@@ -198,7 +198,7 @@ struct sock *inet6_lookup_listener(struc + /* Lookup redirect from BPF */ + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { + result = inet6_lookup_run_bpf(net, hashinfo, skb, doff, +- saddr, sport, daddr, hnum); ++ saddr, sport, daddr, hnum, dif); + if (result) + goto done; + } +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -195,7 +195,7 @@ static inline struct sock *udp6_lookup_r + const struct in6_addr *saddr, + __be16 sport, + const struct in6_addr *daddr, +- u16 hnum) ++ u16 hnum, const int dif) + { + struct sock *sk, *reuse_sk; + bool no_reuseport; +@@ -203,8 +203,8 @@ static inline struct sock *udp6_lookup_r + if (udptable != &udp_table) + return NULL; /* only UDP is supported */ + +- no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, +- saddr, sport, daddr, hnum, &sk); ++ no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, saddr, sport, ++ daddr, hnum, dif, &sk); + if (no_reuseport || IS_ERR_OR_NULL(sk)) + return sk; + +@@ -240,7 +240,7 @@ struct sock *__udp6_lib_lookup(struct ne + /* Lookup redirect from BPF */ + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { + sk = udp6_lookup_run_bpf(net, udptable, skb, +- saddr, sport, daddr, hnum); ++ saddr, sport, daddr, hnum, dif); + if (sk) { + result = sk; + goto done; +--- a/tools/include/uapi/linux/bpf.h ++++ b/tools/include/uapi/linux/bpf.h +@@ -6319,6 +6319,7 @@ struct bpf_sk_lookup { + __u32 local_ip4; /* Network byte order */ + __u32 local_ip6[4]; /* Network byte order */ + __u32 local_port; /* Host byte order */ ++ __u32 ingress_ifindex; /* The arriving interface. Determined by inet_iif. */ + }; + + /* diff --git a/patches.suse/bpf-Add-missing-map_get_next_key-method-to-bloom-fil.patch b/patches.suse/bpf-Add-missing-map_get_next_key-method-to-bloom-fil.patch new file mode 100644 index 0000000..e872908 --- /dev/null +++ b/patches.suse/bpf-Add-missing-map_get_next_key-method-to-bloom-fil.patch @@ -0,0 +1,42 @@ +From: Haimin Zhang +Date: Wed, 29 Dec 2021 19:20:02 +0800 +Subject: bpf: Add missing map_get_next_key method to bloom filter map. +Patch-mainline: v5.17-rc1 +Git-commit: 3ccdcee28415c4226de05438b4d89eb5514edf73 +References: jsc#PED-1368 + +Without it, kernel crashes in map_get_next_key(). + +Fixes: 9330986c0300 ("bpf: Add bloom filter map implementation") +Reported-by: TCS Robot +Signed-off-by: Haimin Zhang +Signed-off-by: Alexei Starovoitov +Acked-by: Joanne Koong +Link: https://lore.kernel.org/bpf/1640776802-22421-1-git-send-email-tcs.kernel@gmail.com +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/bloom_filter.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/kernel/bpf/bloom_filter.c ++++ b/kernel/bpf/bloom_filter.c +@@ -82,6 +82,11 @@ static int bloom_map_delete_elem(struct + return -EOPNOTSUPP; + } + ++static int bloom_map_get_next_key(struct bpf_map *map, void *key, void *next_key) ++{ ++ return -EOPNOTSUPP; ++} ++ + static struct bpf_map *bloom_map_alloc(union bpf_attr *attr) + { + u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits; +@@ -192,6 +197,7 @@ const struct bpf_map_ops bloom_filter_ma + .map_meta_equal = bpf_map_meta_equal, + .map_alloc = bloom_map_alloc, + .map_free = bloom_map_free, ++ .map_get_next_key = bloom_map_get_next_key, + .map_push_elem = bloom_map_push_elem, + .map_peek_elem = bloom_map_peek_elem, + .map_pop_elem = bloom_map_pop_elem, diff --git a/patches.suse/bpf-Add-schedule-points-in-batch-ops.patch b/patches.suse/bpf-Add-schedule-points-in-batch-ops.patch new file mode 100644 index 0000000..e372257 --- /dev/null +++ b/patches.suse/bpf-Add-schedule-points-in-batch-ops.patch @@ -0,0 +1,61 @@ +From: Eric Dumazet +Date: Thu, 17 Feb 2022 10:19:02 -0800 +Subject: bpf: Add schedule points in batch ops +Patch-mainline: v5.17-rc6 +Git-commit: 75134f16e7dd0007aa474b281935c5f42e79f2c8 +References: jsc#PED-1368 + +syzbot reported various soft lockups caused by bpf batch operations. + + INFO: task kworker/1:1:27 blocked for more than 140 seconds. + INFO: task hung in rcu_barrier + +Nothing prevents batch ops to process huge amount of data, +we need to add schedule points in them. + +Note that maybe_wait_bpf_programs(map) calls from +generic_map_delete_batch() can be factorized by moving +the call after the loop. + +This will be done later in -next tree once we get this fix merged, +unless there is strong opinion doing this optimization sooner. + +Fixes: aa2e93b8e58e ("bpf: Add generic support for update and delete batch ops") +Fixes: cb4d03ab499d ("bpf: Add generic support for lookup batch op") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Signed-off-by: Alexei Starovoitov +Reviewed-by: Stanislav Fomichev +Acked-by: Brian Vazquez +Link: https://lore.kernel.org/bpf/20220217181902.808742-1-eric.dumazet@gmail.com +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/syscall.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/kernel/bpf/syscall.c ++++ b/kernel/bpf/syscall.c +@@ -1355,6 +1355,7 @@ int generic_map_delete_batch(struct bpf_ + maybe_wait_bpf_programs(map); + if (err) + break; ++ cond_resched(); + } + if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) + err = -EFAULT; +@@ -1412,6 +1413,7 @@ int generic_map_update_batch(struct bpf_ + + if (err) + break; ++ cond_resched(); + } + + if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) +@@ -1509,6 +1511,7 @@ int generic_map_lookup_batch(struct bpf_ + swap(prev_key, key); + retry = MAP_LOOKUP_RETRIES; + cp++; ++ cond_resched(); + } + + if (err == -EFAULT) diff --git a/patches.suse/bpf-Allow-access-to-int-pointer-arguments-in-tracing.patch b/patches.suse/bpf-Allow-access-to-int-pointer-arguments-in-tracing.patch new file mode 100644 index 0000000..6694729 --- /dev/null +++ b/patches.suse/bpf-Allow-access-to-int-pointer-arguments-in-tracing.patch @@ -0,0 +1,68 @@ +From: Jiri Olsa +Date: Wed, 8 Dec 2021 20:32:41 +0100 +Subject: bpf: Allow access to int pointer arguments in tracing programs +Patch-mainline: v5.17-rc1 +Git-commit: bb6728d756112596881a5fdf2040544031905840 +References: jsc#PED-1368 + +Adding support to access arguments with int pointer arguments +in tracing programs. + +Currently we allow tracing programs to access only pointers to +string (char pointer), void pointers and pointers to structs. + +If we try to access argument which is pointer to int, verifier +will fail to load the program with; + + R1 type=ctx expected=fp + ; int BPF_PROG(fmod_ret_test, int _a, __u64 _b, int _ret) + 0: (bf) r6 = r1 + ; int BPF_PROG(fmod_ret_test, int _a, __u64 _b, int _ret) + 1: (79) r9 = *(u64 *)(r6 +8) + func 'bpf_modify_return_test' arg1 type INT is not a struct + +There is no harm for the program to access int pointer argument. +We are already doing that for string pointer, which is pointer +to int with 1 byte size. + +Changing the is_string_ptr to generic integer check and renaming +it to btf_type_is_int. + +Signed-off-by: Jiri Olsa +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211208193245.172141-2-jolsa@kernel.org +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/btf.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -4826,7 +4826,7 @@ struct btf *bpf_prog_get_target_btf(cons + return prog->aux->attach_btf; + } + +-static bool is_string_ptr(struct btf *btf, const struct btf_type *t) ++static bool is_int_ptr(struct btf *btf, const struct btf_type *t) + { + /* t comes in already as a pointer */ + t = btf_type_by_id(btf, t->type); +@@ -4835,8 +4835,7 @@ static bool is_string_ptr(struct btf *bt + if (BTF_INFO_KIND(t->info) == BTF_KIND_CONST) + t = btf_type_by_id(btf, t->type); + +- /* char, signed char, unsigned char */ +- return btf_type_is_int(t) && t->size == 1; ++ return btf_type_is_int(t); + } + + bool btf_ctx_access(int off, int size, enum bpf_access_type type, +@@ -4957,7 +4956,7 @@ bool btf_ctx_access(int off, int size, e + */ + return true; + +- if (is_string_ptr(btf, t)) ++ if (is_int_ptr(btf, t)) + return true; + + /* this is a pointer to another type */ diff --git a/patches.suse/bpf-Allow-bpf_local_storage-to-be-used-by-sleepable-.patch b/patches.suse/bpf-Allow-bpf_local_storage-to-be-used-by-sleepable-.patch new file mode 100644 index 0000000..c28e5fc --- /dev/null +++ b/patches.suse/bpf-Allow-bpf_local_storage-to-be-used-by-sleepable-.patch @@ -0,0 +1,339 @@ +From: KP Singh +Date: Fri, 24 Dec 2021 15:29:15 +0000 +Subject: bpf: Allow bpf_local_storage to be used by sleepable programs +Patch-mainline: v5.17-rc1 +Git-commit: 0fe4b381a59ebc53522fce579b281a67a9e1bee6 +References: jsc#PED-1368 + +Other maps like hashmaps are already available to sleepable programs. +Sleepable BPF programs run under trace RCU. Allow task, sk and inode +storage to be used from sleepable programs. This allows sleepable and +non-sleepable programs to provide shareable annotations on kernel +objects. + +Sleepable programs run in trace RCU where as non-sleepable programs run +in a normal RCU critical section i.e. __bpf_prog_enter{_sleepable} +and __bpf_prog_exit{_sleepable}) (rcu_read_lock or rcu_read_lock_trace). + +In order to make the local storage maps accessible to both sleepable +and non-sleepable programs, one needs to call both +call_rcu_tasks_trace and call_rcu to wait for both trace and classical +RCU grace periods to expire before freeing memory. + +Paul's work on call_rcu_tasks_trace allows us to have per CPU queueing +for call_rcu_tasks_trace. This behaviour can be achieved by setting +rcupdate.rcu_task_enqueue_lim= boot parameter. + +In light of these new performance changes and to keep the local storage +code simple, avoid adding a new flag for sleepable maps / local storage +to select the RCU synchronization (trace / classical). + +Also, update the dereferencing of the pointers to use +rcu_derference_check (with either the trace or normal RCU locks held) +with a common bpf_rcu_lock_held helper method. + +Signed-off-by: KP Singh +Signed-off-by: Alexei Starovoitov +Acked-by: Martin KaFai Lau +Link: https://lore.kernel.org/bpf/20211224152916.1550677-2-kpsingh@kernel.org +Acked-by: Shung-Hsi Yu +--- + include/linux/bpf_local_storage.h | 5 +++ + kernel/bpf/bpf_inode_storage.c | 6 +++- + kernel/bpf/bpf_local_storage.c | 50 ++++++++++++++++++++++++++++---------- + kernel/bpf/bpf_task_storage.c | 6 +++- + kernel/bpf/verifier.c | 3 ++ + net/core/bpf_sk_storage.c | 8 +++++- + 6 files changed, 62 insertions(+), 16 deletions(-) + +--- a/include/linux/bpf_local_storage.h ++++ b/include/linux/bpf_local_storage.h +@@ -17,6 +17,9 @@ + + #define BPF_LOCAL_STORAGE_CACHE_SIZE 16 + ++#define bpf_rcu_lock_held() \ ++ (rcu_read_lock_held() || rcu_read_lock_trace_held() || \ ++ rcu_read_lock_bh_held()) + struct bpf_local_storage_map_bucket { + struct hlist_head list; + raw_spinlock_t lock; +@@ -162,4 +165,6 @@ struct bpf_local_storage_data * + bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, + void *value, u64 map_flags); + ++void bpf_local_storage_free_rcu(struct rcu_head *rcu); ++ + #endif /* _BPF_LOCAL_STORAGE_H */ +--- a/kernel/bpf/bpf_inode_storage.c ++++ b/kernel/bpf/bpf_inode_storage.c +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + + DEFINE_BPF_STORAGE_CACHE(inode_cache); + +@@ -44,7 +45,8 @@ static struct bpf_local_storage_data *in + if (!bsb) + return NULL; + +- inode_storage = rcu_dereference(bsb->storage); ++ inode_storage = ++ rcu_dereference_check(bsb->storage, bpf_rcu_lock_held()); + if (!inode_storage) + return NULL; + +@@ -172,6 +174,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct + { + struct bpf_local_storage_data *sdata; + ++ WARN_ON_ONCE(!bpf_rcu_lock_held()); + if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) + return (unsigned long)NULL; + +@@ -204,6 +207,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct + BPF_CALL_2(bpf_inode_storage_delete, + struct bpf_map *, map, struct inode *, inode) + { ++ WARN_ON_ONCE(!bpf_rcu_lock_held()); + if (!inode) + return -EINVAL; + +--- a/kernel/bpf/bpf_local_storage.c ++++ b/kernel/bpf/bpf_local_storage.c +@@ -11,6 +11,9 @@ + #include + #include + #include ++#include ++#include ++#include + + #define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE) + +@@ -81,6 +84,22 @@ bpf_selem_alloc(struct bpf_local_storage + return NULL; + } + ++void bpf_local_storage_free_rcu(struct rcu_head *rcu) ++{ ++ struct bpf_local_storage *local_storage; ++ ++ local_storage = container_of(rcu, struct bpf_local_storage, rcu); ++ kfree_rcu(local_storage, rcu); ++} ++ ++static void bpf_selem_free_rcu(struct rcu_head *rcu) ++{ ++ struct bpf_local_storage_elem *selem; ++ ++ selem = container_of(rcu, struct bpf_local_storage_elem, rcu); ++ kfree_rcu(selem, rcu); ++} ++ + /* local_storage->lock must be held and selem->local_storage == local_storage. + * The caller must ensure selem->smap is still valid to be + * dereferenced for its smap->elem_size and smap->cache_idx. +@@ -93,7 +112,7 @@ bool bpf_selem_unlink_storage_nolock(str + bool free_local_storage; + void *owner; + +- smap = rcu_dereference(SDATA(selem)->smap); ++ smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); + owner = local_storage->owner; + + /* All uncharging on the owner must be done first. +@@ -118,12 +137,12 @@ bool bpf_selem_unlink_storage_nolock(str + * + * Although the unlock will be done under + * rcu_read_lock(), it is more intutivie to +- * read if kfree_rcu(local_storage, rcu) is done ++ * read if the freeing of the storage is done + * after the raw_spin_unlock_bh(&local_storage->lock). + * + * Hence, a "bool free_local_storage" is returned +- * to the caller which then calls the kfree_rcu() +- * after unlock. ++ * to the caller which then calls then frees the storage after ++ * all the RCU grace periods have expired. + */ + } + hlist_del_init_rcu(&selem->snode); +@@ -131,8 +150,7 @@ bool bpf_selem_unlink_storage_nolock(str + SDATA(selem)) + RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); + +- kfree_rcu(selem, rcu); +- ++ call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu); + return free_local_storage; + } + +@@ -146,7 +164,8 @@ static void __bpf_selem_unlink_storage(s + /* selem has already been unlinked from sk */ + return; + +- local_storage = rcu_dereference(selem->local_storage); ++ local_storage = rcu_dereference_check(selem->local_storage, ++ bpf_rcu_lock_held()); + raw_spin_lock_irqsave(&local_storage->lock, flags); + if (likely(selem_linked_to_storage(selem))) + free_local_storage = bpf_selem_unlink_storage_nolock( +@@ -154,7 +173,8 @@ static void __bpf_selem_unlink_storage(s + raw_spin_unlock_irqrestore(&local_storage->lock, flags); + + if (free_local_storage) +- kfree_rcu(local_storage, rcu); ++ call_rcu_tasks_trace(&local_storage->rcu, ++ bpf_local_storage_free_rcu); + } + + void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, +@@ -174,7 +194,7 @@ void bpf_selem_unlink_map(struct bpf_loc + /* selem has already be unlinked from smap */ + return; + +- smap = rcu_dereference(SDATA(selem)->smap); ++ smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); + b = select_bucket(smap, selem); + raw_spin_lock_irqsave(&b->lock, flags); + if (likely(selem_linked_to_map(selem))) +@@ -213,12 +233,14 @@ bpf_local_storage_lookup(struct bpf_loca + struct bpf_local_storage_elem *selem; + + /* Fast path (cache hit) */ +- sdata = rcu_dereference(local_storage->cache[smap->cache_idx]); ++ sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx], ++ bpf_rcu_lock_held()); + if (sdata && rcu_access_pointer(sdata->smap) == smap) + return sdata; + + /* Slow path (cache miss) */ +- hlist_for_each_entry_rcu(selem, &local_storage->list, snode) ++ hlist_for_each_entry_rcu(selem, &local_storage->list, snode, ++ rcu_read_lock_trace_held()) + if (rcu_access_pointer(SDATA(selem)->smap) == smap) + break; + +@@ -306,7 +328,8 @@ int bpf_local_storage_alloc(void *owner, + * bucket->list, first_selem can be freed immediately + * (instead of kfree_rcu) because + * bpf_local_storage_map_free() does a +- * synchronize_rcu() before walking the bucket->list. ++ * synchronize_rcu_mult (waiting for both sleepable and ++ * normal programs) before walking the bucket->list. + * Hence, no one is accessing selem from the + * bucket->list under rcu_read_lock(). + */ +@@ -342,7 +365,8 @@ bpf_local_storage_update(void *owner, st + !map_value_has_spin_lock(&smap->map))) + return ERR_PTR(-EINVAL); + +- local_storage = rcu_dereference(*owner_storage(smap, owner)); ++ local_storage = rcu_dereference_check(*owner_storage(smap, owner), ++ bpf_rcu_lock_held()); + if (!local_storage || hlist_empty(&local_storage->list)) { + /* Very first elem for the owner */ + err = check_flags(NULL, map_flags); +--- a/kernel/bpf/bpf_task_storage.c ++++ b/kernel/bpf/bpf_task_storage.c +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + + DEFINE_BPF_STORAGE_CACHE(task_cache); + +@@ -59,7 +60,8 @@ task_storage_lookup(struct task_struct * + struct bpf_local_storage *task_storage; + struct bpf_local_storage_map *smap; + +- task_storage = rcu_dereference(task->bpf_storage); ++ task_storage = ++ rcu_dereference_check(task->bpf_storage, bpf_rcu_lock_held()); + if (!task_storage) + return NULL; + +@@ -229,6 +231,7 @@ BPF_CALL_4(bpf_task_storage_get, struct + { + struct bpf_local_storage_data *sdata; + ++ WARN_ON_ONCE(!bpf_rcu_lock_held()); + if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) + return (unsigned long)NULL; + +@@ -260,6 +263,7 @@ BPF_CALL_2(bpf_task_storage_delete, stru + { + int ret; + ++ WARN_ON_ONCE(!bpf_rcu_lock_held()); + if (!task) + return -EINVAL; + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -11897,6 +11897,9 @@ static int check_map_prog_compatibility( + } + break; + case BPF_MAP_TYPE_RINGBUF: ++ case BPF_MAP_TYPE_INODE_STORAGE: ++ case BPF_MAP_TYPE_SK_STORAGE: ++ case BPF_MAP_TYPE_TASK_STORAGE: + break; + default: + verbose(env, +--- a/net/core/bpf_sk_storage.c ++++ b/net/core/bpf_sk_storage.c +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + + DEFINE_BPF_STORAGE_CACHE(sk_cache); + +@@ -22,7 +23,8 @@ bpf_sk_storage_lookup(struct sock *sk, s + struct bpf_local_storage *sk_storage; + struct bpf_local_storage_map *smap; + +- sk_storage = rcu_dereference(sk->sk_bpf_storage); ++ sk_storage = ++ rcu_dereference_check(sk->sk_bpf_storage, bpf_rcu_lock_held()); + if (!sk_storage) + return NULL; + +@@ -258,6 +260,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bp + { + struct bpf_local_storage_data *sdata; + ++ WARN_ON_ONCE(!bpf_rcu_lock_held()); + if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE) + return (unsigned long)NULL; + +@@ -288,6 +291,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bp + + BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) + { ++ WARN_ON_ONCE(!bpf_rcu_lock_held()); + if (!sk || !sk_fullsock(sk)) + return -EINVAL; + +@@ -416,6 +420,7 @@ static bool bpf_sk_storage_tracing_allow + BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, + void *, value, u64, flags) + { ++ WARN_ON_ONCE(!bpf_rcu_lock_held()); + if (in_hardirq() || in_nmi()) + return (unsigned long)NULL; + +@@ -425,6 +430,7 @@ BPF_CALL_4(bpf_sk_storage_get_tracing, s + BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map, + struct sock *, sk) + { ++ WARN_ON_ONCE(!bpf_rcu_lock_held()); + if (in_hardirq() || in_nmi()) + return -EPERM; + diff --git a/patches.suse/bpf-Change-bpf_kallsyms_lookup_name-size-type-to-ARG.patch b/patches.suse/bpf-Change-bpf_kallsyms_lookup_name-size-type-to-ARG.patch new file mode 100644 index 0000000..6851a76 --- /dev/null +++ b/patches.suse/bpf-Change-bpf_kallsyms_lookup_name-size-type-to-ARG.patch @@ -0,0 +1,35 @@ +From: Kumar Kartikeya Dwivedi +Date: Tue, 23 Nov 2021 05:27:31 +0530 +Subject: bpf: Change bpf_kallsyms_lookup_name size type to + ARG_CONST_SIZE_OR_ZERO +Patch-mainline: v5.17-rc1 +Git-commit: d4efb170861827290f7f571020001a60d001faaf +References: jsc#PED-1368 + +Andrii mentioned in [0] that switching to ARG_CONST_SIZE_OR_ZERO lets +user avoid having to prove that string size at runtime is not zero and +helps with not having to supress clang optimizations. + + [0]: https://lore.kernel.org/bpf/CAEf4BzZa_vhXB3c8atNcTS6=krQvC25H7K7c3WWZhM=27ro=Wg@mail.gmail.com + +Suggested-by: Andrii Nakryiko +Signed-off-by: Kumar Kartikeya Dwivedi +Signed-off-by: Andrii Nakryiko +Acked-by: Song Liu +Link: https://lore.kernel.org/bpf/20211122235733.634914-2-memxor@gmail.com +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/syscall.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/bpf/syscall.c ++++ b/kernel/bpf/syscall.c +@@ -4819,7 +4819,7 @@ const struct bpf_func_proto bpf_kallsyms + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_MEM, +- .arg2_type = ARG_CONST_SIZE, ++ .arg2_type = ARG_CONST_SIZE_OR_ZERO, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_LONG, + }; diff --git a/patches.suse/bpf-Change-value-of-MAX_TAIL_CALL_CNT-from-32-to-33.patch b/patches.suse/bpf-Change-value-of-MAX_TAIL_CALL_CNT-from-32-to-33.patch new file mode 100644 index 0000000..e816f93 --- /dev/null +++ b/patches.suse/bpf-Change-value-of-MAX_TAIL_CALL_CNT-from-32-to-33.patch @@ -0,0 +1,383 @@ +From: Tiezhu Yang +Date: Fri, 5 Nov 2021 09:30:00 +0800 +Subject: bpf: Change value of MAX_TAIL_CALL_CNT from 32 to 33 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.17-rc1 +Git-commit: ebf7f6f0a6cdcc17a3da52b81e4b3a98c4005028 +References: jsc#PED-1368 + +In the current code, the actual max tail call count is 33 which is greater +than MAX_TAIL_CALL_CNT (defined as 32). The actual limit is not consistent +with the meaning of MAX_TAIL_CALL_CNT and thus confusing at first glance. +We can see the historical evolution from commit 04fd61ab36ec ("bpf: allow +bpf programs to tail-call other bpf programs") and commit f9dabe016b63 +("bpf: Undo off-by-one in interpreter tail call count limit"). In order +to avoid changing existing behavior, the actual limit is 33 now, this is +reasonable. + +After commit 874be05f525e ("bpf, tests: Add tail call test suite"), we can +see there exists failed testcase. + +On all archs when CONFIG_BPF_JIT_ALWAYS_ON is not set: + # echo 0 > /proc/sys/net/core/bpf_jit_enable + # modprobe test_bpf + # dmesg | grep -w FAIL + Tail call error path, max count reached jited:0 ret 34 != 33 FAIL + +On some archs: + # echo 1 > /proc/sys/net/core/bpf_jit_enable + # modprobe test_bpf + # dmesg | grep -w FAIL + Tail call error path, max count reached jited:1 ret 34 != 33 FAIL + +Although the above failed testcase has been fixed in commit 18935a72eb25 +("bpf/tests: Fix error in tail call limit tests"), it would still be good +to change the value of MAX_TAIL_CALL_CNT from 32 to 33 to make the code +more readable. + +The 32-bit x86 JIT was using a limit of 32, just fix the wrong comments and +limit to 33 tail calls as the constant MAX_TAIL_CALL_CNT updated. For the +mips64 JIT, use "ori" instead of "addiu" as suggested by Johan Almbladh. +For the riscv JIT, use RV_REG_TCC directly to save one register move as +suggested by Björn Töpel. For the other implementations, no function changes, +it does not change the current limit 33, the new value of MAX_TAIL_CALL_CNT +can reflect the actual max tail call count, the related tail call testcases +in test_bpf module and selftests can work well for the interpreter and the +JIT. + +Here are the test results on x86_64: + + # uname -m + x86_64 + # echo 0 > /proc/sys/net/core/bpf_jit_enable + # modprobe test_bpf test_suite=test_tail_calls + # dmesg | tail -1 + test_bpf: test_tail_calls: Summary: 8 PASSED, 0 FAILED, [0/8 JIT'ed] + # rmmod test_bpf + # echo 1 > /proc/sys/net/core/bpf_jit_enable + # modprobe test_bpf test_suite=test_tail_calls + # dmesg | tail -1 + test_bpf: test_tail_calls: Summary: 8 PASSED, 0 FAILED, [8/8 JIT'ed] + # rmmod test_bpf + # ./test_progs -t tailcalls + #142 tailcalls:OK + Summary: 1/11 PASSED, 0 SKIPPED, 0 FAILED + +Signed-off-by: Tiezhu Yang +Signed-off-by: Daniel Borkmann +Tested-by: Johan Almbladh +Tested-by: Ilya Leoshkevich +Acked-by: Björn Töpel +Acked-by: Johan Almbladh +Acked-by: Ilya Leoshkevich +Link: https://lore.kernel.org/bpf/1636075800-3264-1-git-send-email-yangtiezhu@loongson.cn +Acked-by: Shung-Hsi Yu +--- + arch/arm/net/bpf_jit_32.c | 5 +++-- + arch/arm64/net/bpf_jit_comp.c | 5 +++-- + arch/powerpc/net/bpf_jit_comp32.c | 4 ++-- + arch/powerpc/net/bpf_jit_comp64.c | 4 ++-- + arch/riscv/net/bpf_jit_comp32.c | 6 ++---- + arch/riscv/net/bpf_jit_comp64.c | 7 +++---- + arch/s390/net/bpf_jit_comp.c | 6 +++--- + arch/sparc/net/bpf_jit_comp_64.c | 2 +- + arch/x86/net/bpf_jit_comp.c | 10 +++++----- + arch/x86/net/bpf_jit_comp32.c | 4 ++-- + include/linux/bpf.h | 2 +- + include/uapi/linux/bpf.h | 2 +- + kernel/bpf/core.c | 3 ++- + lib/test_bpf.c | 4 ++-- + tools/include/uapi/linux/bpf.h | 2 +- + 15 files changed, 33 insertions(+), 33 deletions(-) + +--- a/arch/arm/net/bpf_jit_32.c ++++ b/arch/arm/net/bpf_jit_32.c +@@ -1199,7 +1199,8 @@ static int emit_bpf_tail_call(struct jit + + /* tmp2[0] = array, tmp2[1] = index */ + +- /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) ++ /* ++ * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) + * goto out; + * tail_call_cnt++; + */ +@@ -1208,7 +1209,7 @@ static int emit_bpf_tail_call(struct jit + tc = arm_bpf_get_reg64(tcc, tmp, ctx); + emit(ARM_CMP_I(tc[0], hi), ctx); + _emit(ARM_COND_EQ, ARM_CMP_I(tc[1], lo), ctx); +- _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx); ++ _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); + emit(ARM_ADDS_I(tc[1], tc[1], 1), ctx); + emit(ARM_ADC_I(tc[0], tc[0], 0), ctx); + arm_bpf_put_reg64(tcc, tmp, ctx); +--- a/arch/arm64/net/bpf_jit_comp.c ++++ b/arch/arm64/net/bpf_jit_comp.c +@@ -286,13 +286,14 @@ static int emit_bpf_tail_call(struct jit + emit(A64_CMP(0, r3, tmp), ctx); + emit(A64_B_(A64_COND_CS, jmp_offset), ctx); + +- /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) ++ /* ++ * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) + * goto out; + * tail_call_cnt++; + */ + emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx); + emit(A64_CMP(1, tcc, tmp), ctx); +- emit(A64_B_(A64_COND_HI, jmp_offset), ctx); ++ emit(A64_B_(A64_COND_CS, jmp_offset), ctx); + emit(A64_ADD_I(1, tcc, tcc, 1), ctx); + + /* prog = array->ptrs[index]; +--- a/arch/powerpc/net/bpf_jit_comp32.c ++++ b/arch/powerpc/net/bpf_jit_comp32.c +@@ -221,13 +221,13 @@ static int bpf_jit_emit_tail_call(u32 *i + PPC_BCC(COND_GE, out); + + /* +- * if (tail_call_cnt > MAX_TAIL_CALL_CNT) ++ * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) + * goto out; + */ + EMIT(PPC_RAW_CMPLWI(_R0, MAX_TAIL_CALL_CNT)); + /* tail_call_cnt++; */ + EMIT(PPC_RAW_ADDIC(_R0, _R0, 1)); +- PPC_BCC(COND_GT, out); ++ PPC_BCC(COND_GE, out); + + /* prog = array->ptrs[index]; */ + EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29)); +--- a/arch/powerpc/net/bpf_jit_comp64.c ++++ b/arch/powerpc/net/bpf_jit_comp64.c +@@ -228,12 +228,12 @@ static int bpf_jit_emit_tail_call(u32 *i + PPC_BCC(COND_GE, out); + + /* +- * if (tail_call_cnt > MAX_TAIL_CALL_CNT) ++ * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) + * goto out; + */ + PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); + EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT)); +- PPC_BCC(COND_GT, out); ++ PPC_BCC(COND_GE, out); + + /* + * tail_call_cnt++; +--- a/arch/riscv/net/bpf_jit_comp32.c ++++ b/arch/riscv/net/bpf_jit_comp32.c +@@ -799,11 +799,10 @@ static int emit_bpf_tail_call(int insn, + emit_bcc(BPF_JGE, lo(idx_reg), RV_REG_T1, off, ctx); + + /* +- * temp_tcc = tcc - 1; +- * if (tcc < 0) ++ * if (--tcc < 0) + * goto out; + */ +- emit(rv_addi(RV_REG_T1, RV_REG_TCC, -1), ctx); ++ emit(rv_addi(RV_REG_TCC, RV_REG_TCC, -1), ctx); + off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); + emit_bcc(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx); + +@@ -829,7 +828,6 @@ static int emit_bpf_tail_call(int insn, + if (is_12b_check(off, insn)) + return -1; + emit(rv_lw(RV_REG_T0, off, RV_REG_T0), ctx); +- emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx); + /* Epilogue jumps to *(t0 + 4). */ + __build_epilogue(true, ctx); + return 0; +--- a/arch/riscv/net/bpf_jit_comp64.c ++++ b/arch/riscv/net/bpf_jit_comp64.c +@@ -327,12 +327,12 @@ static int emit_bpf_tail_call(int insn, + off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); + emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx); + +- /* if (TCC-- < 0) ++ /* if (--TCC < 0) + * goto out; + */ +- emit_addi(RV_REG_T1, tcc, -1, ctx); ++ emit_addi(RV_REG_TCC, tcc, -1, ctx); + off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn)); +- emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx); ++ emit_branch(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx); + + /* prog = array->ptrs[index]; + * if (!prog) +@@ -352,7 +352,6 @@ static int emit_bpf_tail_call(int insn, + if (is_12b_check(off, insn)) + return -1; + emit_ld(RV_REG_T3, off, RV_REG_T2, ctx); +- emit_mv(RV_REG_TCC, RV_REG_T1, ctx); + __build_epilogue(true, ctx); + return 0; + } +--- a/arch/s390/net/bpf_jit_comp.c ++++ b/arch/s390/net/bpf_jit_comp.c +@@ -1369,7 +1369,7 @@ static noinline int bpf_jit_insn(struct + jit->prg); + + /* +- * if (tail_call_cnt++ > MAX_TAIL_CALL_CNT) ++ * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT) + * goto out; + */ + +@@ -1381,9 +1381,9 @@ static noinline int bpf_jit_insn(struct + EMIT4_IMM(0xa7080000, REG_W0, 1); + /* laal %w1,%w0,off(%r15) */ + EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off); +- /* clij %w1,MAX_TAIL_CALL_CNT,0x2,out */ ++ /* clij %w1,MAX_TAIL_CALL_CNT-1,0x2,out */ + patch_2_clij = jit->prg; +- EMIT6_PCREL_RIEC(0xec000000, 0x007f, REG_W1, MAX_TAIL_CALL_CNT, ++ EMIT6_PCREL_RIEC(0xec000000, 0x007f, REG_W1, MAX_TAIL_CALL_CNT - 1, + 2, jit->prg); + + /* +--- a/arch/sparc/net/bpf_jit_comp_64.c ++++ b/arch/sparc/net/bpf_jit_comp_64.c +@@ -867,7 +867,7 @@ static void emit_tail_call(struct jit_ct + emit(LD32 | IMMED | RS1(SP) | S13(off) | RD(tmp), ctx); + emit_cmpi(tmp, MAX_TAIL_CALL_CNT, ctx); + #define OFFSET2 13 +- emit_branch(BGU, ctx->idx, ctx->idx + OFFSET2, ctx); ++ emit_branch(BGEU, ctx->idx, ctx->idx + OFFSET2, ctx); + emit_nop(ctx); + + emit_alu_K(ADD, tmp, 1, ctx); +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -412,7 +412,7 @@ static void emit_indirect_jump(u8 **ppro + * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... + * if (index >= array->map.max_entries) + * goto out; +- * if (++tail_call_cnt > MAX_TAIL_CALL_CNT) ++ * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT) + * goto out; + * prog = array->ptrs[index]; + * if (prog == NULL) +@@ -446,14 +446,14 @@ static void emit_bpf_tail_call_indirect( + EMIT2(X86_JBE, offset); /* jbe out */ + + /* +- * if (tail_call_cnt > MAX_TAIL_CALL_CNT) ++ * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT) + * goto out; + */ + EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ + EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ + + offset = ctx->tail_call_indirect_label - (prog + 2 - start); +- EMIT2(X86_JA, offset); /* ja out */ ++ EMIT2(X86_JAE, offset); /* jae out */ + EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ + EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ + +@@ -504,14 +504,14 @@ static void emit_bpf_tail_call_direct(st + int offset; + + /* +- * if (tail_call_cnt > MAX_TAIL_CALL_CNT) ++ * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT) + * goto out; + */ + EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ + EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ + + offset = ctx->tail_call_direct_label - (prog + 2 - start); +- EMIT2(X86_JA, offset); /* ja out */ ++ EMIT2(X86_JAE, offset); /* jae out */ + EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ + EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ + +--- a/arch/x86/net/bpf_jit_comp32.c ++++ b/arch/x86/net/bpf_jit_comp32.c +@@ -1323,7 +1323,7 @@ static void emit_bpf_tail_call(u8 **ppro + EMIT2(IA32_JBE, jmp_label(jmp_label1, 2)); + + /* +- * if (tail_call_cnt > MAX_TAIL_CALL_CNT) ++ * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT) + * goto out; + */ + lo = (u32)MAX_TAIL_CALL_CNT; +@@ -1337,7 +1337,7 @@ static void emit_bpf_tail_call(u8 **ppro + /* cmp ecx,lo */ + EMIT3(0x83, add_1reg(0xF8, IA32_ECX), lo); + +- /* ja out */ ++ /* jae out */ + EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); + + /* add eax,0x1 */ +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -1083,7 +1083,7 @@ struct bpf_array { + }; + + #define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ +-#define MAX_TAIL_CALL_CNT 32 ++#define MAX_TAIL_CALL_CNT 33 + + #define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ + BPF_F_RDONLY_PROG | \ +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -1744,7 +1744,7 @@ union bpf_attr { + * if the maximum number of tail calls has been reached for this + * chain of programs. This limit is defined in the kernel by the + * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), +- * which is currently set to 32. ++ * which is currently set to 33. + * Return + * 0 on success, or a negative error in case of failure. + * +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -1574,7 +1574,8 @@ select_insn: + + if (unlikely(index >= array->map.max_entries)) + goto out; +- if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT)) ++ ++ if (unlikely(tail_call_cnt >= MAX_TAIL_CALL_CNT)) + goto out; + + tail_call_cnt++; +--- a/lib/test_bpf.c ++++ b/lib/test_bpf.c +@@ -14742,7 +14742,7 @@ static struct tail_call_test tail_call_t + BPF_EXIT_INSN(), + }, + .flags = FLAG_NEED_STATE | FLAG_RESULT_IN_STATE, +- .result = (MAX_TAIL_CALL_CNT + 1 + 1) * MAX_TESTRUNS, ++ .result = (MAX_TAIL_CALL_CNT + 1) * MAX_TESTRUNS, + }, + { + "Tail call count preserved across function calls", +@@ -14764,7 +14764,7 @@ static struct tail_call_test tail_call_t + }, + .stack_depth = 8, + .flags = FLAG_NEED_STATE | FLAG_RESULT_IN_STATE, +- .result = (MAX_TAIL_CALL_CNT + 1 + 1) * MAX_TESTRUNS, ++ .result = (MAX_TAIL_CALL_CNT + 1) * MAX_TESTRUNS, + }, + { + "Tail call error path, NULL target", +--- a/tools/include/uapi/linux/bpf.h ++++ b/tools/include/uapi/linux/bpf.h +@@ -1747,7 +1747,7 @@ union bpf_attr { + * if the maximum number of tail calls has been reached for this + * chain of programs. This limit is defined in the kernel by the + * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), +- * which is currently set to 32. ++ * which is currently set to 33. + * Return + * 0 on success, or a negative error in case of failure. + * diff --git a/patches.suse/bpf-Clean-up-bpf_verifier_vlog-for-BPF_LOG_KERNEL-lo.patch b/patches.suse/bpf-Clean-up-bpf_verifier_vlog-for-BPF_LOG_KERNEL-lo.patch new file mode 100644 index 0000000..fecfa57 --- /dev/null +++ b/patches.suse/bpf-Clean-up-bpf_verifier_vlog-for-BPF_LOG_KERNEL-lo.patch @@ -0,0 +1,55 @@ +From: Hou Tao +Date: Wed, 1 Dec 2021 15:34:57 +0800 +Subject: bpf: Clean-up bpf_verifier_vlog() for BPF_LOG_KERNEL log level +Patch-mainline: v5.17-rc1 +Git-commit: 436d404cc8ff573a417cb3b6a5c76655121aceac +References: jsc#PED-1368 + +An extra newline will output for bpf_log() with BPF_LOG_KERNEL level +as shown below: + +[ 52.095704] BPF:The function test_3 has 12 arguments. Too many. +[ 52.095704] +[ 52.096896] Error in parsing func ptr test_3 in struct bpf_dummy_ops + +Now all bpf_log() are ended by newline, but not all btf_verifier_log() +are ended by newline, so checking whether or not the log message +has the trailing newline and adding a newline if not. + +Also there is no need to calculate the left userspace buffer size +for kernel log output and to truncate the output by '\0' which +has already been done by vscnprintf(), so only do these for +userspace log output. + +Signed-off-by: Hou Tao +Signed-off-by: Alexei Starovoitov +Acked-by: Yonghong Song +Acked-by: Martin KaFai Lau +Link: https://lore.kernel.org/bpf/20211201073458.2731595-2-houtao1@huawei.com +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/verifier.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -293,13 +293,15 @@ void bpf_verifier_vlog(struct bpf_verifi + WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1, + "verifier log line truncated - local buffer too short\n"); + +- n = min(log->len_total - log->len_used - 1, n); +- log->kbuf[n] = '\0'; +- + if (log->level == BPF_LOG_KERNEL) { +- pr_err("BPF:%s\n", log->kbuf); ++ bool newline = n > 0 && log->kbuf[n - 1] == '\n'; ++ ++ pr_err("BPF: %s%s", log->kbuf, newline ? "" : "\n"); + return; + } ++ ++ n = min(log->len_total - log->len_used - 1, n); ++ log->kbuf[n] = '\0'; + if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1)) + log->len_used += n; + else diff --git a/patches.suse/bpf-Define-enum-bpf_core_relo_kind-as-uapi.patch b/patches.suse/bpf-Define-enum-bpf_core_relo_kind-as-uapi.patch new file mode 100644 index 0000000..af8d151 --- /dev/null +++ b/patches.suse/bpf-Define-enum-bpf_core_relo_kind-as-uapi.patch @@ -0,0 +1,323 @@ +From: Alexei Starovoitov +Date: Wed, 1 Dec 2021 10:10:27 -0800 +Subject: bpf: Define enum bpf_core_relo_kind as uapi. +Patch-mainline: v5.17-rc1 +Git-commit: 46334a0cd21bed70d6f1ddef1464f75a0ebe1774 +References: jsc#PED-1368 + +enum bpf_core_relo_kind is generated by llvm and processed by libbpf. +It's a de-facto uapi. +With CO-RE in the kernel the bpf_core_relo_kind values become uapi de-jure. +Also rename them with BPF_CORE_ prefix to distinguish from conflicting names in +bpf_core_read.h. The enums bpf_field_info_kind, bpf_type_id_kind, +bpf_type_info_kind, bpf_enum_value_kind are passing different values from bpf +program into llvm. + +Signed-off-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211201181040.23337-5-alexei.starovoitov@gmail.com +Acked-by: Shung-Hsi Yu +--- + include/uapi/linux/bpf.h | 19 +++++++++ + tools/include/uapi/linux/bpf.h | 19 +++++++++ + tools/lib/bpf/libbpf.c | 2 + tools/lib/bpf/relo_core.c | 84 ++++++++++++++++++++--------------------- + tools/lib/bpf/relo_core.h | 18 -------- + 5 files changed, 82 insertions(+), 60 deletions(-) + +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -6374,4 +6374,23 @@ enum { + BTF_F_ZERO = (1ULL << 3), + }; + ++/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value ++ * has to be adjusted by relocations. It is emitted by llvm and passed to ++ * libbpf and later to the kernel. ++ */ ++enum bpf_core_relo_kind { ++ BPF_CORE_FIELD_BYTE_OFFSET = 0, /* field byte offset */ ++ BPF_CORE_FIELD_BYTE_SIZE = 1, /* field size in bytes */ ++ BPF_CORE_FIELD_EXISTS = 2, /* field existence in target kernel */ ++ BPF_CORE_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ ++ BPF_CORE_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ ++ BPF_CORE_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ ++ BPF_CORE_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ ++ BPF_CORE_TYPE_ID_TARGET = 7, /* type ID in target kernel */ ++ BPF_CORE_TYPE_EXISTS = 8, /* type existence in target kernel */ ++ BPF_CORE_TYPE_SIZE = 9, /* type size in bytes */ ++ BPF_CORE_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ ++ BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */ ++}; ++ + #endif /* _UAPI__LINUX_BPF_H__ */ +--- a/tools/include/uapi/linux/bpf.h ++++ b/tools/include/uapi/linux/bpf.h +@@ -6377,4 +6377,23 @@ enum { + BTF_F_ZERO = (1ULL << 3), + }; + ++/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value ++ * has to be adjusted by relocations. It is emitted by llvm and passed to ++ * libbpf and later to the kernel. ++ */ ++enum bpf_core_relo_kind { ++ BPF_CORE_FIELD_BYTE_OFFSET = 0, /* field byte offset */ ++ BPF_CORE_FIELD_BYTE_SIZE = 1, /* field size in bytes */ ++ BPF_CORE_FIELD_EXISTS = 2, /* field existence in target kernel */ ++ BPF_CORE_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ ++ BPF_CORE_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ ++ BPF_CORE_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ ++ BPF_CORE_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ ++ BPF_CORE_TYPE_ID_TARGET = 7, /* type ID in target kernel */ ++ BPF_CORE_TYPE_EXISTS = 8, /* type existence in target kernel */ ++ BPF_CORE_TYPE_SIZE = 9, /* type size in bytes */ ++ BPF_CORE_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ ++ BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */ ++}; ++ + #endif /* _UAPI__LINUX_BPF_H__ */ +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -5523,7 +5523,7 @@ static int bpf_core_apply_relo(struct bp + return -ENOTSUP; + } + +- if (relo->kind != BPF_TYPE_ID_LOCAL && ++ if (relo->kind != BPF_CORE_TYPE_ID_LOCAL && + !hashmap__find(cand_cache, type_key, (void **)&cands)) { + cands = bpf_core_find_cands(prog->obj, local_btf, local_id); + if (IS_ERR(cands)) { +--- a/tools/lib/bpf/relo_core.c ++++ b/tools/lib/bpf/relo_core.c +@@ -113,18 +113,18 @@ static bool is_flex_arr(const struct btf + static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) + { + switch (kind) { +- case BPF_FIELD_BYTE_OFFSET: return "byte_off"; +- case BPF_FIELD_BYTE_SIZE: return "byte_sz"; +- case BPF_FIELD_EXISTS: return "field_exists"; +- case BPF_FIELD_SIGNED: return "signed"; +- case BPF_FIELD_LSHIFT_U64: return "lshift_u64"; +- case BPF_FIELD_RSHIFT_U64: return "rshift_u64"; +- case BPF_TYPE_ID_LOCAL: return "local_type_id"; +- case BPF_TYPE_ID_TARGET: return "target_type_id"; +- case BPF_TYPE_EXISTS: return "type_exists"; +- case BPF_TYPE_SIZE: return "type_size"; +- case BPF_ENUMVAL_EXISTS: return "enumval_exists"; +- case BPF_ENUMVAL_VALUE: return "enumval_value"; ++ case BPF_CORE_FIELD_BYTE_OFFSET: return "byte_off"; ++ case BPF_CORE_FIELD_BYTE_SIZE: return "byte_sz"; ++ case BPF_CORE_FIELD_EXISTS: return "field_exists"; ++ case BPF_CORE_FIELD_SIGNED: return "signed"; ++ case BPF_CORE_FIELD_LSHIFT_U64: return "lshift_u64"; ++ case BPF_CORE_FIELD_RSHIFT_U64: return "rshift_u64"; ++ case BPF_CORE_TYPE_ID_LOCAL: return "local_type_id"; ++ case BPF_CORE_TYPE_ID_TARGET: return "target_type_id"; ++ case BPF_CORE_TYPE_EXISTS: return "type_exists"; ++ case BPF_CORE_TYPE_SIZE: return "type_size"; ++ case BPF_CORE_ENUMVAL_EXISTS: return "enumval_exists"; ++ case BPF_CORE_ENUMVAL_VALUE: return "enumval_value"; + default: return "unknown"; + } + } +@@ -132,12 +132,12 @@ static const char *core_relo_kind_str(en + static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) + { + switch (kind) { +- case BPF_FIELD_BYTE_OFFSET: +- case BPF_FIELD_BYTE_SIZE: +- case BPF_FIELD_EXISTS: +- case BPF_FIELD_SIGNED: +- case BPF_FIELD_LSHIFT_U64: +- case BPF_FIELD_RSHIFT_U64: ++ case BPF_CORE_FIELD_BYTE_OFFSET: ++ case BPF_CORE_FIELD_BYTE_SIZE: ++ case BPF_CORE_FIELD_EXISTS: ++ case BPF_CORE_FIELD_SIGNED: ++ case BPF_CORE_FIELD_LSHIFT_U64: ++ case BPF_CORE_FIELD_RSHIFT_U64: + return true; + default: + return false; +@@ -147,10 +147,10 @@ static bool core_relo_is_field_based(enu + static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) + { + switch (kind) { +- case BPF_TYPE_ID_LOCAL: +- case BPF_TYPE_ID_TARGET: +- case BPF_TYPE_EXISTS: +- case BPF_TYPE_SIZE: ++ case BPF_CORE_TYPE_ID_LOCAL: ++ case BPF_CORE_TYPE_ID_TARGET: ++ case BPF_CORE_TYPE_EXISTS: ++ case BPF_CORE_TYPE_SIZE: + return true; + default: + return false; +@@ -160,8 +160,8 @@ static bool core_relo_is_type_based(enum + static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) + { + switch (kind) { +- case BPF_ENUMVAL_EXISTS: +- case BPF_ENUMVAL_VALUE: ++ case BPF_CORE_ENUMVAL_EXISTS: ++ case BPF_CORE_ENUMVAL_VALUE: + return true; + default: + return false; +@@ -624,7 +624,7 @@ static int bpf_core_calc_field_relo(cons + + *field_sz = 0; + +- if (relo->kind == BPF_FIELD_EXISTS) { ++ if (relo->kind == BPF_CORE_FIELD_EXISTS) { + *val = spec ? 1 : 0; + return 0; + } +@@ -637,7 +637,7 @@ static int bpf_core_calc_field_relo(cons + + /* a[n] accessor needs special handling */ + if (!acc->name) { +- if (relo->kind == BPF_FIELD_BYTE_OFFSET) { ++ if (relo->kind == BPF_CORE_FIELD_BYTE_OFFSET) { + *val = spec->bit_offset / 8; + /* remember field size for load/store mem size */ + sz = btf__resolve_size(spec->btf, acc->type_id); +@@ -645,7 +645,7 @@ static int bpf_core_calc_field_relo(cons + return -EINVAL; + *field_sz = sz; + *type_id = acc->type_id; +- } else if (relo->kind == BPF_FIELD_BYTE_SIZE) { ++ } else if (relo->kind == BPF_CORE_FIELD_BYTE_SIZE) { + sz = btf__resolve_size(spec->btf, acc->type_id); + if (sz < 0) + return -EINVAL; +@@ -697,36 +697,36 @@ static int bpf_core_calc_field_relo(cons + *validate = !bitfield; + + switch (relo->kind) { +- case BPF_FIELD_BYTE_OFFSET: ++ case BPF_CORE_FIELD_BYTE_OFFSET: + *val = byte_off; + if (!bitfield) { + *field_sz = byte_sz; + *type_id = field_type_id; + } + break; +- case BPF_FIELD_BYTE_SIZE: ++ case BPF_CORE_FIELD_BYTE_SIZE: + *val = byte_sz; + break; +- case BPF_FIELD_SIGNED: ++ case BPF_CORE_FIELD_SIGNED: + /* enums will be assumed unsigned */ + *val = btf_is_enum(mt) || + (btf_int_encoding(mt) & BTF_INT_SIGNED); + if (validate) + *validate = true; /* signedness is never ambiguous */ + break; +- case BPF_FIELD_LSHIFT_U64: ++ case BPF_CORE_FIELD_LSHIFT_U64: + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + *val = 64 - (bit_off + bit_sz - byte_off * 8); + #else + *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); + #endif + break; +- case BPF_FIELD_RSHIFT_U64: ++ case BPF_CORE_FIELD_RSHIFT_U64: + *val = 64 - bit_sz; + if (validate) + *validate = true; /* right shift is never ambiguous */ + break; +- case BPF_FIELD_EXISTS: ++ case BPF_CORE_FIELD_EXISTS: + default: + return -EOPNOTSUPP; + } +@@ -747,20 +747,20 @@ static int bpf_core_calc_type_relo(const + } + + switch (relo->kind) { +- case BPF_TYPE_ID_TARGET: ++ case BPF_CORE_TYPE_ID_TARGET: + *val = spec->root_type_id; + break; +- case BPF_TYPE_EXISTS: ++ case BPF_CORE_TYPE_EXISTS: + *val = 1; + break; +- case BPF_TYPE_SIZE: ++ case BPF_CORE_TYPE_SIZE: + sz = btf__resolve_size(spec->btf, spec->root_type_id); + if (sz < 0) + return -EINVAL; + *val = sz; + break; +- case BPF_TYPE_ID_LOCAL: +- /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */ ++ case BPF_CORE_TYPE_ID_LOCAL: ++ /* BPF_CORE_TYPE_ID_LOCAL is handled specially and shouldn't get here */ + default: + return -EOPNOTSUPP; + } +@@ -776,10 +776,10 @@ static int bpf_core_calc_enumval_relo(co + const struct btf_enum *e; + + switch (relo->kind) { +- case BPF_ENUMVAL_EXISTS: ++ case BPF_CORE_ENUMVAL_EXISTS: + *val = spec ? 1 : 0; + break; +- case BPF_ENUMVAL_VALUE: ++ case BPF_CORE_ENUMVAL_VALUE: + if (!spec) + return -EUCLEAN; /* request instruction poisoning */ + t = btf_type_by_id(spec->btf, spec->spec[0].type_id); +@@ -1236,7 +1236,7 @@ int bpf_core_apply_relo_insn(const char + libbpf_print(LIBBPF_DEBUG, "\n"); + + /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ +- if (relo->kind == BPF_TYPE_ID_LOCAL) { ++ if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) { + targ_res.validate = true; + targ_res.poison = false; + targ_res.orig_val = local_spec.root_type_id; +@@ -1302,7 +1302,7 @@ int bpf_core_apply_relo_insn(const char + } + + /* +- * For BPF_FIELD_EXISTS relo or when used BPF program has field ++ * For BPF_CORE_FIELD_EXISTS relo or when used BPF program has field + * existence checks or kernel version/config checks, it's expected + * that we might not find any candidates. In this case, if field + * wasn't found in any candidate, the list of candidates shouldn't +--- a/tools/lib/bpf/relo_core.h ++++ b/tools/lib/bpf/relo_core.h +@@ -4,23 +4,7 @@ + #ifndef __RELO_CORE_H + #define __RELO_CORE_H + +-/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value +- * has to be adjusted by relocations. +- */ +-enum bpf_core_relo_kind { +- BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ +- BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */ +- BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ +- BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ +- BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ +- BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ +- BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ +- BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */ +- BPF_TYPE_EXISTS = 8, /* type existence in target kernel */ +- BPF_TYPE_SIZE = 9, /* type size in bytes */ +- BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ +- BPF_ENUMVAL_VALUE = 11, /* enum value integer value */ +-}; ++#include + + /* The minimum bpf_core_relo checked by the loader + * diff --git a/patches.suse/bpf-Do-not-try-bpf_msg_push_data-with-len-0.patch b/patches.suse/bpf-Do-not-try-bpf_msg_push_data-with-len-0.patch new file mode 100644 index 0000000..c7c6821 --- /dev/null +++ b/patches.suse/bpf-Do-not-try-bpf_msg_push_data-with-len-0.patch @@ -0,0 +1,41 @@ +From: Felix Maurer +Date: Wed, 9 Feb 2022 16:55:26 +0100 +Subject: bpf: Do not try bpf_msg_push_data with len 0 +Patch-mainline: v5.17-rc6 +Git-commit: 4a11678f683814df82fca9018d964771e02d7e6d +References: jsc#PED-1368 + +If bpf_msg_push_data() is called with len 0 (as it happens during +selftests/bpf/test_sockmap), we do not need to do anything and can +return early. + +Calling bpf_msg_push_data() with len 0 previously lead to a wrong ENOMEM +error: we later called get_order(copy + len); if len was 0, copy + len +was also often 0 and get_order() returned some undefined value (at the +moment 52). alloc_pages() caught that and failed, but then bpf_msg_push_data() +returned ENOMEM. This was wrong because we are most probably not out of +memory and actually do not need any additional memory. + +Fixes: 6fff607e2f14b ("bpf: sk_msg program helper bpf_msg_push_data") +Signed-off-by: Felix Maurer +Signed-off-by: Daniel Borkmann +Acked-by: Yonghong Song +Acked-by: John Fastabend +Link: https://lore.kernel.org/bpf/df69012695c7094ccb1943ca02b4920db3537466.1644421921.git.fmaurer@redhat.com +Acked-by: Shung-Hsi Yu +--- + net/core/filter.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -2727,6 +2727,9 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_ + if (unlikely(flags)) + return -EINVAL; + ++ if (unlikely(len == 0)) ++ return 0; ++ + /* First find the starting scatterlist element */ + i = msg->sg.start; + do { diff --git a/patches.suse/bpf-Document-BPF-licensing.patch b/patches.suse/bpf-Document-BPF-licensing.patch new file mode 100644 index 0000000..b9ac95c --- /dev/null +++ b/patches.suse/bpf-Document-BPF-licensing.patch @@ -0,0 +1,144 @@ +From: Alexei Starovoitov +Date: Fri, 17 Sep 2021 16:00:34 -0700 +Subject: bpf: Document BPF licensing. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.16-rc1 +Git-commit: c86216bc96aa2a61ee5248d99d0bd15e69cf52d1 +References: jsc#PED-1368 + +Document and clarify BPF licensing. + +Signed-off-by: Alexei Starovoitov +Signed-off-by: Daniel Borkmann +Reviewed-by: Simon Horman +Acked-by: Toke Høiland-Jørgensen +Acked-by: Daniel Borkmann +Acked-by: Joe Stringer +Acked-by: Lorenz Bauer +Acked-by: Dave Thaler +Acked-by: Stephen Hemminger +Acked-by: Jesper Dangaard Brouer +Acked-by: KP Singh +Link: https://lore.kernel.org/bpf/20210917230034.51080-1-alexei.starovoitov@gmail.com +Acked-by: Shung-Hsi Yu +--- + Documentation/bpf/bpf_licensing.rst | 92 ++++++++++++++++++++++++++++++++++++ + Documentation/bpf/index.rst | 9 +++ + 2 files changed, 101 insertions(+) + create mode 100644 Documentation/bpf/bpf_licensing.rst + +--- /dev/null ++++ b/Documentation/bpf/bpf_licensing.rst +@@ -0,0 +1,92 @@ ++============= ++BPF licensing ++============= ++ ++Background ++========== ++ ++* Classic BPF was BSD licensed ++ ++"BPF" was originally introduced as BSD Packet Filter in ++http://www.tcpdump.org/papers/bpf-usenix93.pdf. The corresponding instruction ++set and its implementation came from BSD with BSD license. That original ++instruction set is now known as "classic BPF". ++ ++However an instruction set is a specification for machine-language interaction, ++similar to a programming language. It is not a code. Therefore, the ++application of a BSD license may be misleading in a certain context, as the ++instruction set may enjoy no copyright protection. ++ ++* eBPF (extended BPF) instruction set continues to be BSD ++ ++In 2014, the classic BPF instruction set was significantly extended. We ++typically refer to this instruction set as eBPF to disambiguate it from cBPF. ++The eBPF instruction set is still BSD licensed. ++ ++Implementations of eBPF ++======================= ++ ++Using the eBPF instruction set requires implementing code in both kernel space ++and user space. ++ ++In Linux Kernel ++--------------- ++ ++The reference implementations of the eBPF interpreter and various just-in-time ++compilers are part of Linux and are GPLv2 licensed. The implementation of ++eBPF helper functions is also GPLv2 licensed. Interpreters, JITs, helpers, ++and verifiers are called eBPF runtime. ++ ++In User Space ++------------- ++ ++There are also implementations of eBPF runtime (interpreter, JITs, helper ++functions) under ++Apache2 (https://github.com/iovisor/ubpf), ++MIT (https://github.com/qmonnet/rbpf), and ++BSD (https://github.com/DPDK/dpdk/blob/main/lib/librte_bpf). ++ ++In HW ++----- ++ ++The HW can choose to execute eBPF instruction natively and provide eBPF runtime ++in HW or via the use of implementing firmware with a proprietary license. ++ ++In other operating systems ++-------------------------- ++ ++Other kernels or user space implementations of eBPF instruction set and runtime ++can have proprietary licenses. ++ ++Using BPF programs in the Linux kernel ++====================================== ++ ++Linux Kernel (while being GPLv2) allows linking of proprietary kernel modules ++under these rules: ++Documentation/process/license-rules.rst ++ ++When a kernel module is loaded, the linux kernel checks which functions it ++intends to use. If any function is marked as "GPL only," the corresponding ++module or program has to have GPL compatible license. ++ ++Loading BPF program into the Linux kernel is similar to loading a kernel ++module. BPF is loaded at run time and not statically linked to the Linux ++kernel. BPF program loading follows the same license checking rules as kernel ++modules. BPF programs can be proprietary if they don't use "GPL only" BPF ++helper functions. ++ ++Further, some BPF program types - Linux Security Modules (LSM) and TCP ++Congestion Control (struct_ops), as of Aug 2021 - are required to be GPL ++compatible even if they don't use "GPL only" helper functions directly. The ++registration step of LSM and TCP congestion control modules of the Linux ++kernel is done through EXPORT_SYMBOL_GPL kernel functions. In that sense LSM ++and struct_ops BPF programs are implicitly calling "GPL only" functions. ++The same restriction applies to BPF programs that call kernel functions ++directly via unstable interface also known as "kfunc". ++ ++Packaging BPF programs with user space applications ++==================================================== ++ ++Generally, proprietary-licensed applications and GPL licensed BPF programs ++written for the Linux kernel in the same package can co-exist because they are ++separate executable processes. This applies to both cBPF and eBPF programs. +--- a/Documentation/bpf/index.rst ++++ b/Documentation/bpf/index.rst +@@ -82,6 +82,15 @@ Testing and debugging BPF + s390 + + ++Licensing ++========= ++ ++.. toctree:: ++ :maxdepth: 1 ++ ++ bpf_licensing ++ ++ + Other + ===== + diff --git a/patches.suse/bpf-Don-t-promote-bogus-looking-registers-after-null.patch b/patches.suse/bpf-Don-t-promote-bogus-looking-registers-after-null.patch new file mode 100644 index 0000000..5aca8ab --- /dev/null +++ b/patches.suse/bpf-Don-t-promote-bogus-looking-registers-after-null.patch @@ -0,0 +1,46 @@ +From: Daniel Borkmann +Date: Wed, 5 Jan 2022 11:35:13 -0800 +Subject: bpf: Don't promote bogus looking registers after null check. +Patch-mainline: v5.17-rc1 +Git-commit: e60b0d12a95dcf16a63225cead4541567f5cb517 +References: jsc#PED-1368 + +If we ever get to a point again where we convert a bogus looking _or_null +typed register containing a non-zero fixed or variable offset, then lets not +reset these bounds to zero since they are not and also don't promote the register +to a type, but instead leave it as _or_null. Converting to a unknown +register could be an avenue as well, but then if we run into this case it would +allow to leak a kernel pointer this way. + +Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") +Signed-off-by: Daniel Borkmann +Signed-off-by: Alexei Starovoitov +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/verifier.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -9083,15 +9083,15 @@ static void mark_ptr_or_null_reg(struct + { + if (type_may_be_null(reg->type) && reg->id == id && + !WARN_ON_ONCE(!reg->id)) { +- /* Old offset (both fixed and variable parts) should +- * have been known-zero, because we don't allow pointer +- * arithmetic on pointers that might be NULL. +- */ + if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || + !tnum_equals_const(reg->var_off, 0) || + reg->off)) { +- __mark_reg_known_zero(reg); +- reg->off = 0; ++ /* Old offset (both fixed and variable parts) should ++ * have been known-zero, because we don't allow pointer ++ * arithmetic on pointers that might be NULL. If we ++ * see this happening, don't convert the register. ++ */ ++ return; + } + if (is_null) { + reg->type = SCALAR_VALUE; diff --git a/patches.suse/bpf-Emit-bpf_timer-in-vmlinux-BTF.patch b/patches.suse/bpf-Emit-bpf_timer-in-vmlinux-BTF.patch new file mode 100644 index 0000000..240afa7 --- /dev/null +++ b/patches.suse/bpf-Emit-bpf_timer-in-vmlinux-BTF.patch @@ -0,0 +1,44 @@ +From: Yonghong Song +Date: Fri, 11 Feb 2022 11:49:48 -0800 +Subject: bpf: Emit bpf_timer in vmlinux BTF +Patch-mainline: v5.17-rc6 +Git-commit: 3bd916ee0ecbbdd902fc24845f2fef332b2a310c +References: jsc#PED-1368 + +Currently the following code in check_and_init_map_value() + *(struct bpf_timer *)(dst + map->timer_off) = + (struct bpf_timer){}; +can help generate bpf_timer definition in vmlinuxBTF. +But the code above may not zero the whole structure +due to anonymour members and that code will be replaced +by memset in the subsequent patch and +bpf_timer definition will disappear from vmlinuxBTF. +Let us emit the type explicitly so bpf program can continue +to use it from vmlinux.h. + +Signed-off-by: Yonghong Song +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220211194948.3141529-1-yhs@fb.com +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/helpers.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/kernel/bpf/helpers.c ++++ b/kernel/bpf/helpers.c +@@ -2,6 +2,7 @@ + /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + */ + #include ++#include + #include + #include + #include +@@ -1075,6 +1076,7 @@ static enum hrtimer_restart bpf_timer_cb + void *key; + u32 idx; + ++ BTF_TYPE_EMIT(struct bpf_timer); + callback_fn = rcu_dereference_check(t->callback_fn, rcu_read_lock_bh_held()); + if (!callback_fn) + goto out; diff --git a/patches.suse/bpf-Extend-BTF_ID_LIST_GLOBAL-with-parameter-for-num.patch b/patches.suse/bpf-Extend-BTF_ID_LIST_GLOBAL-with-parameter-for-num.patch new file mode 100644 index 0000000..b08c0f5 --- /dev/null +++ b/patches.suse/bpf-Extend-BTF_ID_LIST_GLOBAL-with-parameter-for-num.patch @@ -0,0 +1,109 @@ +From: Song Liu +Date: Fri, 12 Nov 2021 07:02:42 -0800 +Subject: bpf: Extend BTF_ID_LIST_GLOBAL with parameter for number of IDs +Patch-mainline: v5.17-rc1 +Git-commit: 9e2ad638ae3632ef916ceb39f70e3104bf8fdc97 +References: jsc#PED-1368 + +syzbot reported the following BUG w/o CONFIG_DEBUG_INFO_BTF + +BUG: KASAN: global-out-of-bounds in task_iter_init+0x212/0x2e7 kernel/bpf/task_iter.c:661 +Read of size 4 at addr ffffffff90297404 by task swapper/0/1 + +CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.15.0-syzkaller #0 +Hardware name: ... Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + +__dump_stack lib/dump_stack.c:88 [inline] +dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 +print_address_description.constprop.0.cold+0xf/0x309 mm/kasan/report.c:256 +__kasan_report mm/kasan/report.c:442 [inline] +kasan_report.cold+0x83/0xdf mm/kasan/report.c:459 +task_iter_init+0x212/0x2e7 kernel/bpf/task_iter.c:661 +do_one_initcall+0x103/0x650 init/main.c:1295 +do_initcall_level init/main.c:1368 [inline] +do_initcalls init/main.c:1384 [inline] +do_basic_setup init/main.c:1403 [inline] +kernel_init_freeable+0x6b1/0x73a init/main.c:1606 +kernel_init+0x1a/0x1d0 init/main.c:1497 +ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 + + +This is caused by hard-coded name[1] in BTF_ID_LIST_GLOBAL (w/o +CONFIG_DEBUG_INFO_BTF). Fix this by adding a parameter n to +BTF_ID_LIST_GLOBAL. This avoids ifdef CONFIG_DEBUG_INFO_BTF in btf.c and +filter.c. + +Fixes: 7c7e3d31e785 ("bpf: Introduce helper bpf_find_vma") +Reported-by: syzbot+e0d81ec552a21d9071aa@syzkaller.appspotmail.com +Reported-by: Eric Dumazet +Suggested-by: Eric Dumazet +Signed-off-by: Song Liu +Signed-off-by: Alexei Starovoitov +Acked-by: Yonghong Song +Link: https://lore.kernel.org/bpf/20211112150243.1270987-2-songliubraving@fb.com +Acked-by: Shung-Hsi Yu +--- + include/linux/btf_ids.h | 6 +++--- + kernel/bpf/btf.c | 2 +- + net/core/filter.c | 6 +----- + 3 files changed, 5 insertions(+), 9 deletions(-) + +--- a/include/linux/btf_ids.h ++++ b/include/linux/btf_ids.h +@@ -73,7 +73,7 @@ asm( \ + __BTF_ID_LIST(name, local) \ + extern u32 name[]; + +-#define BTF_ID_LIST_GLOBAL(name) \ ++#define BTF_ID_LIST_GLOBAL(name, n) \ + __BTF_ID_LIST(name, globl) + + /* The BTF_ID_LIST_SINGLE macro defines a BTF_ID_LIST with +@@ -83,7 +83,7 @@ __BTF_ID_LIST(name, globl) + BTF_ID_LIST(name) \ + BTF_ID(prefix, typename) + #define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) \ +- BTF_ID_LIST_GLOBAL(name) \ ++ BTF_ID_LIST_GLOBAL(name, 1) \ + BTF_ID(prefix, typename) + + /* +@@ -149,7 +149,7 @@ extern struct btf_id_set name; + #define BTF_ID_LIST(name) static u32 name[5]; + #define BTF_ID(prefix, name) + #define BTF_ID_UNUSED +-#define BTF_ID_LIST_GLOBAL(name) u32 name[1]; ++#define BTF_ID_LIST_GLOBAL(name, n) u32 name[n]; + #define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1]; + #define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 name[1]; + #define BTF_SET_START(name) static struct btf_id_set name = { 0 }; +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -6354,7 +6354,7 @@ const struct bpf_func_proto bpf_btf_find + .arg4_type = ARG_ANYTHING, + }; + +-BTF_ID_LIST_GLOBAL(btf_task_struct_ids) ++BTF_ID_LIST_GLOBAL(btf_task_struct_ids, 3) + BTF_ID(struct, task_struct) + BTF_ID(struct, file) + BTF_ID(struct, vm_area_struct) +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -10680,14 +10680,10 @@ void bpf_prog_change_xdp(struct bpf_prog + bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog); + } + +-#ifdef CONFIG_DEBUG_INFO_BTF +-BTF_ID_LIST_GLOBAL(btf_sock_ids) ++BTF_ID_LIST_GLOBAL(btf_sock_ids, MAX_BTF_SOCK_TYPE) + #define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type) + BTF_SOCK_TYPE_xxx + #undef BTF_SOCK_TYPE +-#else +-u32 btf_sock_ids[MAX_BTF_SOCK_TYPE]; +-#endif + + BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk) + { diff --git a/patches.suse/bpf-Extend-kfunc-with-PTR_TO_CTX-PTR_TO_MEM-argument.patch b/patches.suse/bpf-Extend-kfunc-with-PTR_TO_CTX-PTR_TO_MEM-argument.patch new file mode 100644 index 0000000..f753289 --- /dev/null +++ b/patches.suse/bpf-Extend-kfunc-with-PTR_TO_CTX-PTR_TO_MEM-argument.patch @@ -0,0 +1,187 @@ +From: Kumar Kartikeya Dwivedi +Date: Fri, 17 Dec 2021 07:20:24 +0530 +Subject: bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support +Patch-mainline: v5.17-rc1 +Git-commit: 3363bd0cfbb80dfcd25003cd3815b0ad8b68d0ff +References: jsc#PED-1368 + +Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type, +and punt to PTR_TO_MEM block if reg->type does not fall in one of +PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits +to get access to XDP and TC PTR_TO_CTX, and pass various data (flags, +l4proto, netns_id, etc.) encoded in opts struct passed as pointer to +kfunc. + +For PTR_TO_MEM support, arguments are currently limited to pointer to +scalar, or pointer to struct composed of scalars. This is done so that +unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of +in-kernel valid structure is expected, which may have pointers) are +avoided. Since the argument checking happens basd on argument register +type, it is not easy to ascertain what the expected type is. In the +future, support for PTR_TO_MEM for kfunc can be extended to serve other +usecases. The struct type whose pointer is passed in may have maximum +nesting depth of 4, all recursively composed of scalars or struct with +scalars. + +Future commits will add negative tests that check whether these +restrictions imposed for kfunc arguments are duly rejected by BPF +verifier or not. + +Signed-off-by: Kumar Kartikeya Dwivedi +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/btf.c | 94 ++++++++++++++++++++++++++++++++++++++++++------------- + 1 file changed, 73 insertions(+), 21 deletions(-) + +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -5576,12 +5576,53 @@ static u32 *reg2btf_ids[__BPF_REG_TYPE_M + #endif + }; + ++/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */ ++static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log, ++ const struct btf *btf, ++ const struct btf_type *t, int rec) ++{ ++ const struct btf_type *member_type; ++ const struct btf_member *member; ++ u32 i; ++ ++ if (!btf_type_is_struct(t)) ++ return false; ++ ++ for_each_member(i, t, member) { ++ const struct btf_array *array; ++ ++ member_type = btf_type_skip_modifiers(btf, member->type, NULL); ++ if (btf_type_is_struct(member_type)) { ++ if (rec >= 3) { ++ bpf_log(log, "max struct nesting depth exceeded\n"); ++ return false; ++ } ++ if (!__btf_type_is_scalar_struct(log, btf, member_type, rec + 1)) ++ return false; ++ continue; ++ } ++ if (btf_type_is_array(member_type)) { ++ array = btf_type_array(member_type); ++ if (!array->nelems) ++ return false; ++ member_type = btf_type_skip_modifiers(btf, array->type, NULL); ++ if (!btf_type_is_scalar(member_type)) ++ return false; ++ continue; ++ } ++ if (!btf_type_is_scalar(member_type)) ++ return false; ++ } ++ return true; ++} ++ + static int btf_check_func_arg_match(struct bpf_verifier_env *env, + const struct btf *btf, u32 func_id, + struct bpf_reg_state *regs, + bool ptr_to_mem_ok) + { + struct bpf_verifier_log *log = &env->log; ++ bool is_kfunc = btf_is_kernel(btf); + const char *func_name, *ref_tname; + const struct btf_type *t, *ref_t; + const struct btf_param *args; +@@ -5634,7 +5675,20 @@ static int btf_check_func_arg_match(stru + + ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); + ref_tname = btf_name_by_offset(btf, ref_t->name_off); +- if (btf_is_kernel(btf)) { ++ if (btf_get_prog_ctx_type(log, btf, t, ++ env->prog->type, i)) { ++ /* If function expects ctx type in BTF check that caller ++ * is passing PTR_TO_CTX. ++ */ ++ if (reg->type != PTR_TO_CTX) { ++ bpf_log(log, ++ "arg#%d expected pointer to ctx, but got %s\n", ++ i, btf_type_str(t)); ++ return -EINVAL; ++ } ++ if (check_ctx_reg(env, reg, regno)) ++ return -EINVAL; ++ } else if (is_kfunc && (reg->type == PTR_TO_BTF_ID || reg2btf_ids[reg->type])) { + const struct btf_type *reg_ref_t; + const struct btf *reg_btf; + const char *reg_ref_tname; +@@ -5650,14 +5704,9 @@ static int btf_check_func_arg_match(stru + if (reg->type == PTR_TO_BTF_ID) { + reg_btf = reg->btf; + reg_ref_id = reg->btf_id; +- } else if (reg2btf_ids[reg->type]) { ++ } else { + reg_btf = btf_vmlinux; + reg_ref_id = *reg2btf_ids[reg->type]; +- } else { +- bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d is not a pointer to btf_id\n", +- func_name, i, +- btf_type_str(ref_t), ref_tname, regno); +- return -EINVAL; + } + + reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, +@@ -5673,23 +5722,24 @@ static int btf_check_func_arg_match(stru + reg_ref_tname); + return -EINVAL; + } +- } else if (btf_get_prog_ctx_type(log, btf, t, +- env->prog->type, i)) { +- /* If function expects ctx type in BTF check that caller +- * is passing PTR_TO_CTX. +- */ +- if (reg->type != PTR_TO_CTX) { +- bpf_log(log, +- "arg#%d expected pointer to ctx, but got %s\n", +- i, btf_type_str(t)); +- return -EINVAL; +- } +- if (check_ctx_reg(env, reg, regno)) +- return -EINVAL; + } else if (ptr_to_mem_ok) { + const struct btf_type *resolve_ret; + u32 type_size; + ++ if (is_kfunc) { ++ /* Permit pointer to mem, but only when argument ++ * type is pointer to scalar, or struct composed ++ * (recursively) of scalars. ++ */ ++ if (!btf_type_is_scalar(ref_t) && ++ !__btf_type_is_scalar_struct(log, btf, ref_t, 0)) { ++ bpf_log(log, ++ "arg#%d pointer type %s %s must point to scalar or struct with scalar\n", ++ i, btf_type_str(ref_t), ref_tname); ++ return -EINVAL; ++ } ++ } ++ + resolve_ret = btf_resolve_size(btf, ref_t, &type_size); + if (IS_ERR(resolve_ret)) { + bpf_log(log, +@@ -5702,6 +5752,8 @@ static int btf_check_func_arg_match(stru + if (check_mem_reg(env, reg, regno, type_size)) + return -EINVAL; + } else { ++ bpf_log(log, "reg type unsupported for arg#%d %sfunction %s#%d\n", i, ++ is_kfunc ? "kernel " : "", func_name, func_id); + return -EINVAL; + } + } +@@ -5751,7 +5803,7 @@ int btf_check_kfunc_arg_match(struct bpf + const struct btf *btf, u32 func_id, + struct bpf_reg_state *regs) + { +- return btf_check_func_arg_match(env, btf, func_id, regs, false); ++ return btf_check_func_arg_match(env, btf, func_id, regs, true); + } + + /* Convert BTF of a function into bpf_reg_state if possible diff --git a/patches.suse/bpf-Fix-PTR_TO_BTF_ID-var_off-check.patch b/patches.suse/bpf-Fix-PTR_TO_BTF_ID-var_off-check.patch index bc303d5..c9f5237 100644 --- a/patches.suse/bpf-Fix-PTR_TO_BTF_ID-var_off-check.patch +++ b/patches.suse/bpf-Fix-PTR_TO_BTF_ID-var_off-check.patch @@ -6,7 +6,6 @@ Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux Git-commit: 655efe5089f077485eec848272bd7e26b1a5a735 References: git-fixes X-Info: adjusted context, missing "bpf: Add reference tracking support to kfunc" 5c073f26f9dc78a6c8194b23eac7537c9692c7d7 -X-Info: adjusted context, missing "bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support" 3363bd0cfbb80dfcd25003cd3815b0ad8b68d0ff When kfunc support was added, check_ctx_reg was called for PTR_TO_CTX register, but no offset checks were made for PTR_TO_BTF_ID. Only @@ -47,7 +46,7 @@ Acked-by: Shung-Hsi Yu --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c -@@ -5392,6 +5392,7 @@ static int btf_check_func_arg_match(stru +@@ -5627,6 +5627,7 @@ static int btf_check_func_arg_match(stru const struct btf_type *t, *ref_t; const struct btf_param *args; u32 i, nargs, ref_id; @@ -55,7 +54,7 @@ Acked-by: Shung-Hsi Yu t = btf_type_by_id(btf, func_id); if (!t || !btf_type_is_func(t)) { -@@ -5440,6 +5441,11 @@ static int btf_check_func_arg_match(stru +@@ -5675,6 +5676,11 @@ static int btf_check_func_arg_match(stru ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); ref_tname = btf_name_by_offset(btf, ref_t->name_off); @@ -64,15 +63,15 @@ Acked-by: Shung-Hsi Yu + if (ret < 0) + return ret; + - if (btf_is_kernel(btf)) { - const struct btf_type *reg_ref_t; - const struct btf *reg_btf; -@@ -5490,8 +5496,6 @@ static int btf_check_func_arg_match(stru + if (btf_get_prog_ctx_type(log, btf, t, + env->prog->type, i)) { + /* If function expects ctx type in BTF check that caller +@@ -5686,8 +5692,6 @@ static int btf_check_func_arg_match(stru i, btf_type_str(t)); return -EINVAL; } - if (check_ptr_off_reg(env, reg, regno)) - return -EINVAL; - } else if (ptr_to_mem_ok) { - const struct btf_type *resolve_ret; - u32 type_size; + } else if (is_kfunc && (reg->type == PTR_TO_BTF_ID || + (reg2btf_ids[base_type(reg->type)] && !type_flag(reg->type)))) { + const struct btf_type *reg_ref_t; diff --git a/patches.suse/bpf-Fix-SO_RCVBUF-SO_SNDBUF-handling-in-_bpf_setsock.patch b/patches.suse/bpf-Fix-SO_RCVBUF-SO_SNDBUF-handling-in-_bpf_setsock.patch new file mode 100644 index 0000000..93853bd --- /dev/null +++ b/patches.suse/bpf-Fix-SO_RCVBUF-SO_SNDBUF-handling-in-_bpf_setsock.patch @@ -0,0 +1,39 @@ +From: Kuniyuki Iwashima +Date: Tue, 4 Jan 2022 10:31:48 +0900 +Subject: bpf: Fix SO_RCVBUF/SO_SNDBUF handling in _bpf_setsockopt(). +Patch-mainline: v5.17-rc1 +Git-commit: 04c350b1ae6bdb12b84009a4d0bf5ab4e621c47b +References: jsc#PED-1368 + +The commit 4057765f2dee ("sock: consistent handling of extreme +SO_SNDBUF/SO_RCVBUF values") added a change to prevent underflow +in setsockopt() around SO_SNDBUF/SO_RCVBUF. + +This patch adds the same change to _bpf_setsockopt(). + +Fixes: 4057765f2dee ("sock: consistent handling of extreme SO_SNDBUF/SO_RCVBUF values") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220104013153.97906-2-kuniyu@amazon.co.jp +Acked-by: Shung-Hsi Yu +--- + net/core/filter.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -4758,12 +4758,14 @@ static int _bpf_setsockopt(struct sock * + switch (optname) { + case SO_RCVBUF: + val = min_t(u32, val, sysctl_rmem_max); ++ val = min_t(int, val, INT_MAX / 2); + sk->sk_userlocks |= SOCK_RCVBUF_LOCK; + WRITE_ONCE(sk->sk_rcvbuf, + max_t(int, val * 2, SOCK_MIN_RCVBUF)); + break; + case SO_SNDBUF: + val = min_t(u32, val, sysctl_wmem_max); ++ val = min_t(int, val, INT_MAX / 2); + sk->sk_userlocks |= SOCK_SNDBUF_LOCK; + WRITE_ONCE(sk->sk_sndbuf, + max_t(int, val * 2, SOCK_MIN_SNDBUF)); diff --git a/patches.suse/bpf-Fix-a-bpf_timer-initialization-issue.patch b/patches.suse/bpf-Fix-a-bpf_timer-initialization-issue.patch new file mode 100644 index 0000000..5cbf5f8 --- /dev/null +++ b/patches.suse/bpf-Fix-a-bpf_timer-initialization-issue.patch @@ -0,0 +1,111 @@ +From: Yonghong Song +Date: Fri, 11 Feb 2022 11:49:53 -0800 +Subject: bpf: Fix a bpf_timer initialization issue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.17-rc6 +Git-commit: 5eaed6eedbe9612f642ad2b880f961d1c6c8ec2b +References: jsc#PED-1368 + +The patch in [1] intends to fix a bpf_timer related issue, +but the fix caused existing 'timer' selftest to fail with +hang or some random errors. After some debug, I found +an issue with check_and_init_map_value() in the hashtab.c. +More specifically, in hashtab.c, we have code + l_new = bpf_map_kmalloc_node(&htab->map, ...) + check_and_init_map_value(&htab->map, l_new...) +Note that bpf_map_kmalloc_node() does not do initialization +so l_new contains random value. + +The function check_and_init_map_value() intends to zero the +bpf_spin_lock and bpf_timer if they exist in the map. +But I found bpf_spin_lock is zero'ed but bpf_timer is not zero'ed. +With [1], later copy_map_value() skips copying of +bpf_spin_lock and bpf_timer. The non-zero bpf_timer caused +random failures for 'timer' selftest. +Without [1], for both bpf_spin_lock and bpf_timer case, +bpf_timer will be zero'ed, so 'timer' self test is okay. + +For check_and_init_map_value(), why bpf_spin_lock is zero'ed +properly while bpf_timer not. In bpf uapi header, we have + struct bpf_spin_lock { + __u32 val; + }; + struct bpf_timer { + __u64 :64; + __u64 :64; + } __attribute__((aligned(8))); + +The initialization code: + *(struct bpf_spin_lock *)(dst + map->spin_lock_off) = + (struct bpf_spin_lock){}; + *(struct bpf_timer *)(dst + map->timer_off) = + (struct bpf_timer){}; +It appears the compiler has no obligation to initialize anonymous fields. +For example, let us use clang with bpf target as below: + $ cat t.c + struct bpf_timer { + unsigned long long :64; + }; + struct bpf_timer2 { + unsigned long long a; + }; + + void test(struct bpf_timer *t) { + *t = (struct bpf_timer){}; + } + void test2(struct bpf_timer2 *t) { + *t = (struct bpf_timer2){}; + } + $ clang -target bpf -O2 -c -g t.c + $ llvm-objdump -d t.o + ... + 0000000000000000 : + 0: 95 00 00 00 00 00 00 00 exit + 0000000000000008 : + 1: b7 02 00 00 00 00 00 00 r2 = 0 + 2: 7b 21 00 00 00 00 00 00 *(u64 *)(r1 + 0) = r2 + 3: 95 00 00 00 00 00 00 00 exit + +gcc11.2 does not have the above issue. But from + INTERNATIONAL STANDARD ©ISO/IEC ISO/IEC 9899:201x + Programming languages — C + http://www.open-std.org/Jtc1/sc22/wg14/www/docs/n1547.pdf + page 157: + Except where explicitly stated otherwise, for the purposes of + this subclause unnamed members of objects of structure and union + type do not participate in initialization. Unnamed members of + structure objects have indeterminate value even after initialization. + +To fix the problem, let use memset for bpf_timer case in +check_and_init_map_value(). For consistency, memset is also +used for bpf_spin_lock case. + + [1] https://lore.kernel.org/bpf/20220209070324.1093182-2-memxor@gmail.com/ + +Fixes: 68134668c17f3 ("bpf: Add map side support for bpf timers.") +Signed-off-by: Yonghong Song +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220211194953.3142152-1-yhs@fb.com +Acked-by: Shung-Hsi Yu +--- + include/linux/bpf.h | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -209,11 +209,9 @@ static inline bool map_value_has_timer(c + static inline void check_and_init_map_value(struct bpf_map *map, void *dst) + { + if (unlikely(map_value_has_spin_lock(map))) +- *(struct bpf_spin_lock *)(dst + map->spin_lock_off) = +- (struct bpf_spin_lock){}; ++ memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock)); + if (unlikely(map_value_has_timer(map))) +- *(struct bpf_timer *)(dst + map->timer_off) = +- (struct bpf_timer){}; ++ memset(dst + map->timer_off, 0, sizeof(struct bpf_timer)); + } + + /* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */ diff --git a/patches.suse/bpf-Fix-crash-due-to-incorrect-copy_map_value.patch b/patches.suse/bpf-Fix-crash-due-to-incorrect-copy_map_value.patch new file mode 100644 index 0000000..b675b3a --- /dev/null +++ b/patches.suse/bpf-Fix-crash-due-to-incorrect-copy_map_value.patch @@ -0,0 +1,82 @@ +From: Kumar Kartikeya Dwivedi +Date: Wed, 9 Feb 2022 12:33:23 +0530 +Subject: bpf: Fix crash due to incorrect copy_map_value +Patch-mainline: v5.17-rc6 +Git-commit: a8abb0c3dc1e28454851a00f8b7333d9695d566c +References: jsc#PED-1368 + +When both bpf_spin_lock and bpf_timer are present in a BPF map value, +copy_map_value needs to skirt both objects when copying a value into and +out of the map. However, the current code does not set both s_off and +t_off in copy_map_value, which leads to a crash when e.g. bpf_spin_lock +is placed in map value with bpf_timer, as bpf_map_update_elem call will +be able to overwrite the other timer object. + +When the issue is not fixed, an overwriting can produce the following +splat: + +[root@(none) bpf]# ./test_progs -t timer_crash +[ 15.930339] bpf_testmod: loading out-of-tree module taints kernel. +[ 16.037849] ================================================================== +[ 16.038458] BUG: KASAN: user-memory-access in __pv_queued_spin_lock_slowpath+0x32b/0x520 +[ 16.038944] Write of size 8 at addr 0000000000043ec0 by task test_progs/325 +[ 16.039399] +[ 16.039514] CPU: 0 PID: 325 Comm: test_progs Tainted: G OE 5.16.0+ #278 +[ 16.039983] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ArchLinux 1.15.0-1 04/01/2014 +[ 16.040485] Call Trace: +[ 16.040645] +[ 16.040805] dump_stack_lvl+0x59/0x73 +[ 16.041069] ? __pv_queued_spin_lock_slowpath+0x32b/0x520 +[ 16.041427] kasan_report.cold+0x116/0x11b +[ 16.041673] ? __pv_queued_spin_lock_slowpath+0x32b/0x520 +[ 16.042040] __pv_queued_spin_lock_slowpath+0x32b/0x520 +[ 16.042328] ? memcpy+0x39/0x60 +[ 16.042552] ? pv_hash+0xd0/0xd0 +[ 16.042785] ? lockdep_hardirqs_off+0x95/0xd0 +[ 16.043079] __bpf_spin_lock_irqsave+0xdf/0xf0 +[ 16.043366] ? bpf_get_current_comm+0x50/0x50 +[ 16.043608] ? jhash+0x11a/0x270 +[ 16.043848] bpf_timer_cancel+0x34/0xe0 +[ 16.044119] bpf_prog_c4ea1c0f7449940d_sys_enter+0x7c/0x81 +[ 16.044500] bpf_trampoline_6442477838_0+0x36/0x1000 +[ 16.044836] __x64_sys_nanosleep+0x5/0x140 +[ 16.045119] do_syscall_64+0x59/0x80 +[ 16.045377] ? lock_is_held_type+0xe4/0x140 +[ 16.045670] ? irqentry_exit_to_user_mode+0xa/0x40 +[ 16.046001] ? mark_held_locks+0x24/0x90 +[ 16.046287] ? asm_exc_page_fault+0x1e/0x30 +[ 16.046569] ? asm_exc_page_fault+0x8/0x30 +[ 16.046851] ? lockdep_hardirqs_on+0x7e/0x100 +[ 16.047137] entry_SYSCALL_64_after_hwframe+0x44/0xae +[ 16.047405] RIP: 0033:0x7f9e4831718d +[ 16.047602] Code: b4 0c 00 0f 05 eb a9 66 0f 1f 44 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d b3 6c 0c 00 f7 d8 64 89 01 48 +[ 16.048764] RSP: 002b:00007fff488086b8 EFLAGS: 00000206 ORIG_RAX: 0000000000000023 +[ 16.049275] RAX: ffffffffffffffda RBX: 00007f9e48683740 RCX: 00007f9e4831718d +[ 16.049747] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00007fff488086d0 +[ 16.050225] RBP: 00007fff488086f0 R08: 00007fff488085d7 R09: 00007f9e4cb594a0 +[ 16.050648] R10: 0000000000000000 R11: 0000000000000206 R12: 00007f9e484cde30 +[ 16.051124] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 +[ 16.051608] +[ 16.051762] ================================================================== + +Fixes: 68134668c17f ("bpf: Add map side support for bpf timers.") +Signed-off-by: Kumar Kartikeya Dwivedi +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220209070324.1093182-2-memxor@gmail.com +Acked-by: Shung-Hsi Yu +--- + include/linux/bpf.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -224,7 +224,8 @@ static inline void copy_map_value(struct + if (unlikely(map_value_has_spin_lock(map))) { + s_off = map->spin_lock_off; + s_sz = sizeof(struct bpf_spin_lock); +- } else if (unlikely(map_value_has_timer(map))) { ++ } ++ if (unlikely(map_value_has_timer(map))) { + t_off = map->timer_off; + t_sz = sizeof(struct bpf_timer); + } diff --git a/patches.suse/bpf-Fix-crash-due-to-out-of-bounds-access-into-reg2b.patch b/patches.suse/bpf-Fix-crash-due-to-out-of-bounds-access-into-reg2b.patch index 81a63c3..aee15e5 100644 --- a/patches.suse/bpf-Fix-crash-due-to-out-of-bounds-access-into-reg2b.patch +++ b/patches.suse/bpf-Fix-crash-due-to-out-of-bounds-access-into-reg2b.patch @@ -22,20 +22,27 @@ Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220216201943.624869-1-memxor@gmail.com Acked-by: Shung-Hsi Yu --- - kernel/bpf/btf.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) + kernel/bpf/btf.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c -@@ -5456,9 +5456,9 @@ static int btf_check_func_arg_match(stru - if (reg->type == PTR_TO_BTF_ID) { - reg_btf = reg->btf; +@@ -5688,7 +5688,8 @@ static int btf_check_func_arg_match(stru + } + if (check_ptr_off_reg(env, reg, regno)) + return -EINVAL; +- } else if (is_kfunc && (reg->type == PTR_TO_BTF_ID || reg2btf_ids[reg->type])) { ++ } else if (is_kfunc && (reg->type == PTR_TO_BTF_ID || ++ (reg2btf_ids[base_type(reg->type)] && !type_flag(reg->type)))) { + const struct btf_type *reg_ref_t; + const struct btf *reg_btf; + const char *reg_ref_tname; +@@ -5706,7 +5707,7 @@ static int btf_check_func_arg_match(stru reg_ref_id = reg->btf_id; -- } else if (reg2btf_ids[reg->type]) { -+ } else if (reg2btf_ids[base_type(reg->type)] && !type_flag(reg->type)) { + } else { reg_btf = btf_vmlinux; - reg_ref_id = *reg2btf_ids[reg->type]; + reg_ref_id = *reg2btf_ids[base_type(reg->type)]; - } else { - bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d is not a pointer to btf_id\n", - func_name, i, + } + + reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, diff --git a/patches.suse/bpf-Fix-incorrect-integer-literal-used-for-marking-s.patch b/patches.suse/bpf-Fix-incorrect-integer-literal-used-for-marking-s.patch new file mode 100644 index 0000000..66cfe2e --- /dev/null +++ b/patches.suse/bpf-Fix-incorrect-integer-literal-used-for-marking-s.patch @@ -0,0 +1,49 @@ +From: Christy Lee +Date: Fri, 7 Jan 2022 16:58:54 -0800 +Subject: bpf: Fix incorrect integer literal used for marking scratched stack. +Patch-mainline: v5.17-rc1 +Git-commit: 343e53754b21ae45530623222aa079fecd3cf942 +References: jsc#PED-1368 + +env->scratched_stack_slots is a 64-bit value, we should use ULL +instead of UL literal values. + +Reported-by: kernel test robot +Reported-by: Dan Carpenter +Signed-off-by: Christy Lee +Acked-by: Song Liu +Link: https://lore.kernel.org/r/20220108005854.658596-1-christylee@fb.com +Signed-off-by: Alexei Starovoitov +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/verifier.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -616,7 +616,7 @@ static void mark_reg_scratched(struct bp + + static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi) + { +- env->scratched_stack_slots |= 1UL << spi; ++ env->scratched_stack_slots |= 1ULL << spi; + } + + static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno) +@@ -637,14 +637,14 @@ static bool verifier_state_scratched(con + static void mark_verifier_state_clean(struct bpf_verifier_env *env) + { + env->scratched_regs = 0U; +- env->scratched_stack_slots = 0UL; ++ env->scratched_stack_slots = 0ULL; + } + + /* Used for printing the entire verifier state. */ + static void mark_verifier_state_scratched(struct bpf_verifier_env *env) + { + env->scratched_regs = ~0U; +- env->scratched_stack_slots = ~0UL; ++ env->scratched_stack_slots = ~0ULL; + } + + /* The reg state of a pointer or a bounded scalar was saved when diff --git a/patches.suse/bpf-Fix-mount-source-show-for-bpffs.patch b/patches.suse/bpf-Fix-mount-source-show-for-bpffs.patch new file mode 100644 index 0000000..243cc51 --- /dev/null +++ b/patches.suse/bpf-Fix-mount-source-show-for-bpffs.patch @@ -0,0 +1,78 @@ +From: Yafang Shao +Date: Sat, 8 Jan 2022 13:46:23 +0000 +Subject: bpf: Fix mount source show for bpffs +Patch-mainline: v5.17-rc1 +Git-commit: 1e9d74660d4df625b0889e77018f9e94727ceacd +References: jsc#PED-1368 + +We noticed our tc ebpf tools can't start after we upgrade our in-house kernel +version from 4.19 to 5.10. That is because of the behaviour change in bpffs +caused by commit d2935de7e4fd ("vfs: Convert bpf to use the new mount API"). + +In our tc ebpf tools, we do strict environment check. If the environment is +not matched, we won't allow to start the ebpf progs. One of the check is whether +bpffs is properly mounted. The mount information of bpffs in kernel-4.19 and +kernel-5.10 are as follows: + +- kernel 4.19 +$ mount -t bpf bpffs /sys/fs/bpf +$ mount -t bpf +bpffs on /sys/fs/bpf type bpf (rw,relatime) + +- kernel 5.10 +$ mount -t bpf bpffs /sys/fs/bpf +$ mount -t bpf +none on /sys/fs/bpf type bpf (rw,relatime) + +The device name in kernel-5.10 is displayed as none instead of bpffs, then our +environment check fails. Currently we modify the tools to adopt to the kernel +behaviour change, but I think we'd better change the kernel code to keep the +behavior consistent. + +After this change, the mount information will be displayed the same with the +behavior in kernel-4.19, for example: + +$ mount -t bpf bpffs /sys/fs/bpf +$ mount -t bpf +bpffs on /sys/fs/bpf type bpf (rw,relatime) + +Fixes: d2935de7e4fd ("vfs: Convert bpf to use the new mount API") +Suggested-by: Daniel Borkmann +Signed-off-by: Yafang Shao +Signed-off-by: Daniel Borkmann +Acked-by: Christian Brauner +Cc: David Howells +Cc: Al Viro +Link: https://lore.kernel.org/bpf/20220108134623.32467-1-laoar.shao@gmail.com +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/inode.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +--- a/kernel/bpf/inode.c ++++ b/kernel/bpf/inode.c +@@ -648,12 +648,22 @@ static int bpf_parse_param(struct fs_con + int opt; + + opt = fs_parse(fc, bpf_fs_parameters, param, &result); +- if (opt < 0) ++ if (opt < 0) { + /* We might like to report bad mount options here, but + * traditionally we've ignored all mount options, so we'd + * better continue to ignore non-existing options for bpf. + */ +- return opt == -ENOPARAM ? 0 : opt; ++ if (opt == -ENOPARAM) { ++ opt = vfs_parse_fs_param_source(fc, param); ++ if (opt != -ENOPARAM) ++ return opt; ++ ++ return 0; ++ } ++ ++ if (opt < 0) ++ return opt; ++ } + + switch (opt) { + case OPT_MODE: diff --git a/patches.suse/bpf-Fix-possible-race-in-inc_misses_counter.patch b/patches.suse/bpf-Fix-possible-race-in-inc_misses_counter.patch new file mode 100644 index 0000000..389eaaf --- /dev/null +++ b/patches.suse/bpf-Fix-possible-race-in-inc_misses_counter.patch @@ -0,0 +1,40 @@ +From: He Fengqing +Date: Sat, 22 Jan 2022 10:29:36 +0000 +Subject: bpf: Fix possible race in inc_misses_counter +Patch-mainline: v5.17-rc3 +Git-commit: 0e3135d3bfa5dfb658145238d2bc723a8e30c3a3 +References: jsc#PED-1368 + +It seems inc_misses_counter() suffers from same issue fixed in +the commit d979617aa84d ("bpf: Fixes possible race in update_prog_stats() +for 32bit arches"): +As it can run while interrupts are enabled, it could +be re-entered and the u64_stats syncp could be mangled. + +Fixes: 9ed9e9ba2337 ("bpf: Count the number of times recursion was prevented") +Signed-off-by: He Fengqing +Acked-by: John Fastabend +Link: https://lore.kernel.org/r/20220122102936.1219518-1-hefengqing@huawei.com +Signed-off-by: Alexei Starovoitov +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/trampoline.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/kernel/bpf/trampoline.c ++++ b/kernel/bpf/trampoline.c +@@ -550,11 +550,12 @@ static __always_inline u64 notrace bpf_p + static void notrace inc_misses_counter(struct bpf_prog *prog) + { + struct bpf_prog_stats *stats; ++ unsigned int flags; + + stats = this_cpu_ptr(prog->stats); +- u64_stats_update_begin(&stats->syncp); ++ flags = u64_stats_update_begin_irqsave(&stats->syncp); + u64_stats_inc(&stats->misses); +- u64_stats_update_end(&stats->syncp); ++ u64_stats_update_end_irqrestore(&stats->syncp, flags); + } + + /* The logic is similar to bpf_prog_run(), but with an explicit diff --git a/patches.suse/bpf-Fix-renaming-task_getsecid_subj-current_getsecid.patch b/patches.suse/bpf-Fix-renaming-task_getsecid_subj-current_getsecid.patch new file mode 100644 index 0000000..75204d6 --- /dev/null +++ b/patches.suse/bpf-Fix-renaming-task_getsecid_subj-current_getsecid.patch @@ -0,0 +1,30 @@ +From: Alexei Starovoitov +Date: Mon, 24 Jan 2022 20:20:51 -0800 +Subject: bpf: Fix renaming task_getsecid_subj->current_getsecid_subj. +Patch-mainline: v5.17-rc3 +Git-commit: 63ee956f69d8c181e5251c7ce58b84c1edec0f6a +References: jsc#PED-1368 + +The commit 6326948f940d missed renaming of task->current LSM hook in BTF_ID. +Fix it to silence build warning: +WARN: resolve_btfids: unresolved symbol bpf_lsm_task_getsecid_subj + +Fixes: 6326948f940d ("lsm: security_task_getsecid_subj() -> security_current_getsecid_subj()") +Acked-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/bpf_lsm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/bpf/bpf_lsm.c ++++ b/kernel/bpf/bpf_lsm.c +@@ -207,7 +207,7 @@ BTF_ID(func, bpf_lsm_socket_socketpair) + + BTF_ID(func, bpf_lsm_syslog) + BTF_ID(func, bpf_lsm_task_alloc) +-BTF_ID(func, bpf_lsm_task_getsecid_subj) ++BTF_ID(func, bpf_lsm_current_getsecid_subj) + BTF_ID(func, bpf_lsm_task_getsecid_obj) + BTF_ID(func, bpf_lsm_task_prctl) + BTF_ID(func, bpf_lsm_task_setscheduler) diff --git a/patches.suse/bpf-Fix-ringbuf-memory-type-confusion-when-passing-t.patch b/patches.suse/bpf-Fix-ringbuf-memory-type-confusion-when-passing-t.patch new file mode 100644 index 0000000..18e833e --- /dev/null +++ b/patches.suse/bpf-Fix-ringbuf-memory-type-confusion-when-passing-t.patch @@ -0,0 +1,100 @@ +From: Daniel Borkmann +Date: Thu, 13 Jan 2022 11:11:30 +0000 +Subject: bpf: Fix ringbuf memory type confusion when passing to helpers +Patch-mainline: v5.17-rc1 +Git-commit: a672b2e36a648afb04ad3bda93b6bda947a479a5 +References: jsc#PED-1368 + +The bpf_ringbuf_submit() and bpf_ringbuf_discard() have ARG_PTR_TO_ALLOC_MEM +in their bpf_func_proto definition as their first argument, and thus both expect +the result from a prior bpf_ringbuf_reserve() call which has a return type of +RET_PTR_TO_ALLOC_MEM_OR_NULL. + +While the non-NULL memory from bpf_ringbuf_reserve() can be passed to other +helpers, the two sinks (bpf_ringbuf_submit(), bpf_ringbuf_discard()) right now +only enforce a register type of PTR_TO_MEM. + +This can lead to potential type confusion since it would allow other PTR_TO_MEM +memory to be passed into the two sinks which did not come from bpf_ringbuf_reserve(). + +Add a new MEM_ALLOC composable type attribute for PTR_TO_MEM, and enforce that: + + - bpf_ringbuf_reserve() returns NULL or PTR_TO_MEM | MEM_ALLOC + - bpf_ringbuf_submit() and bpf_ringbuf_discard() only take PTR_TO_MEM | MEM_ALLOC + but not plain PTR_TO_MEM arguments via ARG_PTR_TO_ALLOC_MEM + - however, other helpers might treat PTR_TO_MEM | MEM_ALLOC as plain PTR_TO_MEM + to populate the memory area when they use ARG_PTR_TO_{UNINIT_,}MEM in their + func proto description + +Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") +Reported-by: Alexei Starovoitov +Signed-off-by: Daniel Borkmann +Acked-by: John Fastabend +Acked-by: Alexei Starovoitov +Acked-by: Shung-Hsi Yu +--- + include/linux/bpf.h | 9 +++++++-- + kernel/bpf/verifier.c | 6 +++++- + 2 files changed, 12 insertions(+), 3 deletions(-) + +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -316,7 +316,12 @@ enum bpf_type_flag { + */ + MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS), + +- __BPF_TYPE_LAST_FLAG = MEM_RDONLY, ++ /* MEM was "allocated" from a different helper, and cannot be mixed ++ * with regular non-MEM_ALLOC'ed MEM types. ++ */ ++ MEM_ALLOC = BIT(2 + BPF_BASE_TYPE_BITS), ++ ++ __BPF_TYPE_LAST_FLAG = MEM_ALLOC, + }; + + /* Max number of base types. */ +@@ -400,7 +405,7 @@ enum bpf_return_type { + RET_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET, + RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK, + RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON, +- RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM, ++ RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM, + RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID, + + /* This must be the last entry. Its purpose is to ensure the enum is +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -570,6 +570,8 @@ static const char *reg_type_str(struct b + + if (type & MEM_RDONLY) + strncpy(prefix, "rdonly_", 16); ++ if (type & MEM_ALLOC) ++ strncpy(prefix, "alloc_", 16); + + snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s", + prefix, str[base_type(type)], postfix); +@@ -5139,6 +5141,7 @@ static const struct bpf_reg_types mem_ty + PTR_TO_MAP_KEY, + PTR_TO_MAP_VALUE, + PTR_TO_MEM, ++ PTR_TO_MEM | MEM_ALLOC, + PTR_TO_BUF, + }, + }; +@@ -5156,7 +5159,7 @@ static const struct bpf_reg_types int_pt + static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } }; + static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } }; + static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } }; +-static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } }; ++static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } }; + static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } }; + static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } }; + static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } }; +@@ -5319,6 +5322,7 @@ static int check_func_arg(struct bpf_ver + case PTR_TO_MAP_VALUE: + case PTR_TO_MEM: + case PTR_TO_MEM | MEM_RDONLY: ++ case PTR_TO_MEM | MEM_ALLOC: + case PTR_TO_BUF: + case PTR_TO_BUF | MEM_RDONLY: + case PTR_TO_STACK: diff --git a/patches.suse/bpf-Fix-the-test_task_vma-selftest-to-support-output.patch b/patches.suse/bpf-Fix-the-test_task_vma-selftest-to-support-output.patch new file mode 100644 index 0000000..0a8db0c --- /dev/null +++ b/patches.suse/bpf-Fix-the-test_task_vma-selftest-to-support-output.patch @@ -0,0 +1,67 @@ +From: Maxim Mikityanskiy +Date: Tue, 30 Nov 2021 20:18:11 +0200 +Subject: bpf: Fix the test_task_vma selftest to support output shorter than 1 + kB +Patch-mainline: v5.17-rc1 +Git-commit: da54ab14953c38d98cb3e34c564c06c3739394b2 +References: jsc#PED-1368 + +The test for bpf_iter_task_vma assumes that the output will be longer +than 1 kB, as the comment above the loop says. Due to this assumption, +the loop becomes infinite if the output turns to be shorter than 1 kB. +The return value of read_fd_into_buffer is 0 when the end of file was +reached, and len isn't being increased any more. + +This commit adds a break on EOF to handle short output correctly. For +the reference, this is the contents that I get when running test_progs +under vmtest.sh, and it's shorter than 1 kB: + +00400000-00401000 r--p 00000000 fe:00 25867 /root/bpf/test_progs +00401000-00674000 r-xp 00001000 fe:00 25867 /root/bpf/test_progs +00674000-0095f000 r--p 00274000 fe:00 25867 /root/bpf/test_progs +0095f000-00983000 r--p 0055e000 fe:00 25867 /root/bpf/test_progs +00983000-00a8a000 rw-p 00582000 fe:00 25867 /root/bpf/test_progs +00a8a000-0484e000 rw-p 00000000 00:00 0 +7f6c64000000-7f6c64021000 rw-p 00000000 00:00 0 +7f6c64021000-7f6c68000000 ---p 00000000 00:00 0 +7f6c6ac8f000-7f6c6ac90000 r--s 00000000 00:0d 8032 +anon_inode:bpf-map +7f6c6ac90000-7f6c6ac91000 ---p 00000000 00:00 0 +7f6c6ac91000-7f6c6b491000 rw-p 00000000 00:00 0 +7f6c6b491000-7f6c6b492000 r--s 00000000 00:0d 8032 +anon_inode:bpf-map +7f6c6b492000-7f6c6b493000 rw-s 00000000 00:0d 8032 +anon_inode:bpf-map +7ffc1e23d000-7ffc1e25e000 rw-p 00000000 00:00 0 +7ffc1e3b8000-7ffc1e3bc000 r--p 00000000 00:00 0 +7ffc1e3bc000-7ffc1e3bd000 r-xp 00000000 00:00 0 +7fffffffe000-7ffffffff000 --xp 00000000 00:00 0 + +Fixes: e8168840e16c ("selftests/bpf: Add test for bpf_iter_task_vma") +Signed-off-by: Maxim Mikityanskiy +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211130181811.594220-1-maximmi@nvidia.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c ++++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +@@ -1206,13 +1206,14 @@ static void test_task_vma(void) + goto out; + + /* Read CMP_BUFFER_SIZE (1kB) from bpf_iter. Read in small chunks +- * to trigger seq_file corner cases. The expected output is much +- * longer than 1kB, so the while loop will terminate. ++ * to trigger seq_file corner cases. + */ + len = 0; + while (len < CMP_BUFFER_SIZE) { + err = read_fd_into_buffer(iter_fd, task_vma_output + len, + min(read_size, CMP_BUFFER_SIZE - len)); ++ if (!err) ++ break; + if (CHECK(err < 0, "read_iter_fd", "read_iter_fd failed\n")) + goto out; + len += err; diff --git a/patches.suse/bpf-Fix-typo-in-a-comment-in-bpf-lpm_trie.patch b/patches.suse/bpf-Fix-typo-in-a-comment-in-bpf-lpm_trie.patch new file mode 100644 index 0000000..034d1f7 --- /dev/null +++ b/patches.suse/bpf-Fix-typo-in-a-comment-in-bpf-lpm_trie.patch @@ -0,0 +1,28 @@ +From: Leon Huayra +Date: Wed, 29 Dec 2021 22:44:22 +0800 +Subject: bpf: Fix typo in a comment in bpf lpm_trie. +Patch-mainline: v5.17-rc1 +Git-commit: 9e6b19a66d9b6b94395478fe79c5a3ccba181ad3 +References: jsc#PED-1368 + +Fix typo in a comment in trie_update_elem(). + +Signed-off-by: Leon Huayra +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211229144422.70339-1-hffilwlqm@gmail.com +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/lpm_trie.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/bpf/lpm_trie.c ++++ b/kernel/bpf/lpm_trie.c +@@ -412,7 +412,7 @@ static int trie_update_elem(struct bpf_m + rcu_assign_pointer(im_node->child[1], node); + } + +- /* Finally, assign the intermediate node to the determined spot */ ++ /* Finally, assign the intermediate node to the determined slot */ + rcu_assign_pointer(*slot, im_node); + + out: diff --git a/patches.suse/bpf-Fix-verifier-support-for-validation-of-async-cal.patch b/patches.suse/bpf-Fix-verifier-support-for-validation-of-async-cal.patch new file mode 100644 index 0000000..2b1d199 --- /dev/null +++ b/patches.suse/bpf-Fix-verifier-support-for-validation-of-async-cal.patch @@ -0,0 +1,48 @@ +From: Kris Van Hees +Date: Wed, 5 Jan 2022 16:01:50 -0500 +Subject: bpf: Fix verifier support for validation of async callbacks +Patch-mainline: v5.17-rc1 +Git-commit: a5bebc4f00dee47113eed48098c68e88b5ba70e8 +References: jsc#PED-1368 + +Commit bfc6bb74e4f1 ("bpf: Implement verifier support for validation of async callbacks.") +added support for BPF_FUNC_timer_set_callback to +the __check_func_call() function. The test in __check_func_call() is +flaweed because it can mis-interpret a regular BPF-to-BPF pseudo-call +as a BPF_FUNC_timer_set_callback callback call. + +Consider the conditional in the code: + + if (insn->code == (BPF_JMP | BPF_CALL) && + insn->imm == BPF_FUNC_timer_set_callback) { + +The BPF_FUNC_timer_set_callback has value 170. This means that if you +have a BPF program that contains a pseudo-call with an instruction delta +of 170, this conditional will be found to be true by the verifier, and +it will interpret the pseudo-call as a callback. This leads to a mess +with the verification of the program because it makes the wrong +assumptions about the nature of this call. + +Solution: include an explicit check to ensure that insn->src_reg == 0. +This ensures that calls cannot be mis-interpreted as an async callback +call. + +Fixes: bfc6bb74e4f1 ("bpf: Implement verifier support for validation of async callbacks.") +Signed-off-by: Kris Van Hees +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220105210150.GH1559@oracle.com +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/verifier.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -6035,6 +6035,7 @@ static int __check_func_call(struct bpf_ + } + + if (insn->code == (BPF_JMP | BPF_CALL) && ++ insn->src_reg == 0 && + insn->imm == BPF_FUNC_timer_set_callback) { + struct bpf_verifier_state *async_cb; + diff --git a/patches.suse/bpf-Generalize-check_ctx_reg-for-reuse-with-other-ty.patch b/patches.suse/bpf-Generalize-check_ctx_reg-for-reuse-with-other-ty.patch index 1fbdd21..dcb4833 100644 --- a/patches.suse/bpf-Generalize-check_ctx_reg-for-reuse-with-other-ty.patch +++ b/patches.suse/bpf-Generalize-check_ctx_reg-for-reuse-with-other-ty.patch @@ -22,7 +22,7 @@ Acked-by: Shung-Hsi Yu --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h -@@ -483,8 +483,8 @@ bpf_prog_offload_replace_insn(struct bpf +@@ -519,8 +519,8 @@ bpf_prog_offload_replace_insn(struct bpf void bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); @@ -35,18 +35,18 @@ Acked-by: Shung-Hsi Yu --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c -@@ -5489,7 +5489,7 @@ static int btf_check_func_arg_match(stru +@@ -5686,7 +5686,7 @@ static int btf_check_func_arg_match(stru i, btf_type_str(t)); return -EINVAL; } - if (check_ctx_reg(env, reg, regno)) + if (check_ptr_off_reg(env, reg, regno)) return -EINVAL; - } else if (ptr_to_mem_ok) { - const struct btf_type *resolve_ret; + } else if (is_kfunc && (reg->type == PTR_TO_BTF_ID || reg2btf_ids[reg->type])) { + const struct btf_type *reg_ref_t; --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c -@@ -3672,16 +3672,16 @@ static int get_callee_stack_depth(struct +@@ -3973,16 +3973,16 @@ static int get_callee_stack_depth(struct } #endif @@ -69,7 +69,7 @@ Acked-by: Shung-Hsi Yu return -EACCES; } -@@ -3689,7 +3689,8 @@ int check_ctx_reg(struct bpf_verifier_en +@@ -3990,7 +3990,8 @@ int check_ctx_reg(struct bpf_verifier_en char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); @@ -79,7 +79,7 @@ Acked-by: Shung-Hsi Yu return -EACCES; } -@@ -4125,7 +4126,7 @@ static int check_mem_access(struct bpf_v +@@ -4441,7 +4442,7 @@ static int check_mem_access(struct bpf_v return -EACCES; } @@ -88,7 +88,7 @@ Acked-by: Shung-Hsi Yu if (err < 0) return err; -@@ -4917,7 +4918,7 @@ static int check_func_arg(struct bpf_ver +@@ -5309,7 +5310,7 @@ static int check_func_arg(struct bpf_ver return err; if (type == PTR_TO_CTX) { @@ -97,7 +97,7 @@ Acked-by: Shung-Hsi Yu if (err < 0) return err; } -@@ -9069,7 +9070,7 @@ static int check_ld_abs(struct bpf_verif +@@ -9654,7 +9655,7 @@ static int check_ld_abs(struct bpf_verif return err; } diff --git a/patches.suse/bpf-Guard-against-accessing-NULL-pt_regs-in-bpf_get_.patch b/patches.suse/bpf-Guard-against-accessing-NULL-pt_regs-in-bpf_get_.patch new file mode 100644 index 0000000..05398dc --- /dev/null +++ b/patches.suse/bpf-Guard-against-accessing-NULL-pt_regs-in-bpf_get_.patch @@ -0,0 +1,42 @@ +From: "Naveen N. Rao" +Date: Thu, 6 Jan 2022 17:15:05 +0530 +Subject: bpf: Guard against accessing NULL pt_regs in bpf_get_task_stack() +Patch-mainline: v5.17-rc2 +Git-commit: b992f01e66150fc5e90be4a96f5eb8e634c8249e +References: jsc#PED-1368 + +task_pt_regs() can return NULL on powerpc for kernel threads. This is +then used in __bpf_get_stack() to check for user mode, resulting in a +kernel oops. Guard against this by checking return value of +task_pt_regs() before trying to obtain the call chain. + +Fixes: fa28dcb82a38f8 ("bpf: Introduce helper bpf_get_task_stack()") +Cc: stable@vger.kernel.org # v5.9+ +Signed-off-by: Naveen N. Rao +Acked-by: Daniel Borkmann +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/d5ef83c361cc255494afd15ff1b4fb02a36e1dcf.1641468127.git.naveen.n.rao@linux.vnet.ibm.com +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/stackmap.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/kernel/bpf/stackmap.c ++++ b/kernel/bpf/stackmap.c +@@ -472,13 +472,14 @@ BPF_CALL_4(bpf_get_task_stack, struct ta + u32, size, u64, flags) + { + struct pt_regs *regs; +- long res; ++ long res = -EINVAL; + + if (!try_get_task_stack(task)) + return -EFAULT; + + regs = task_pt_regs(task); +- res = __bpf_get_stack(regs, task, NULL, buf, size, flags); ++ if (regs) ++ res = __bpf_get_stack(regs, task, NULL, buf, size, flags); + put_task_stack(task); + + return res; diff --git a/patches.suse/bpf-Introduce-btf_tracing_ids.patch b/patches.suse/bpf-Introduce-btf_tracing_ids.patch new file mode 100644 index 0000000..fbb84df --- /dev/null +++ b/patches.suse/bpf-Introduce-btf_tracing_ids.patch @@ -0,0 +1,162 @@ +From: Song Liu +Date: Fri, 12 Nov 2021 07:02:43 -0800 +Subject: bpf: Introduce btf_tracing_ids +Patch-mainline: v5.17-rc1 +Git-commit: d19ddb476a539fd78ad1028ae13bb38506286931 +References: jsc#PED-1368 + +Similar to btf_sock_ids, btf_tracing_ids provides btf ID for task_struct, +file, and vm_area_struct via easy to understand format like +btf_tracing_ids[BTF_TRACING_TYPE_[TASK|file|VMA]]. + +Suggested-by: Alexei Starovoitov +Signed-off-by: Song Liu +Signed-off-by: Alexei Starovoitov +Acked-by: Yonghong Song +Link: https://lore.kernel.org/bpf/20211112150243.1270987-3-songliubraving@fb.com +Acked-by: Shung-Hsi Yu +--- + include/linux/btf_ids.h | 14 +++++++++++++- + kernel/bpf/bpf_task_storage.c | 4 ++-- + kernel/bpf/btf.c | 8 ++++---- + kernel/bpf/stackmap.c | 2 +- + kernel/bpf/task_iter.c | 12 ++++++------ + kernel/bpf/verifier.c | 2 +- + kernel/trace/bpf_trace.c | 4 ++-- + 7 files changed, 29 insertions(+), 17 deletions(-) + +--- a/include/linux/btf_ids.h ++++ b/include/linux/btf_ids.h +@@ -189,6 +189,18 @@ MAX_BTF_SOCK_TYPE, + extern u32 btf_sock_ids[]; + #endif + +-extern u32 btf_task_struct_ids[]; ++#define BTF_TRACING_TYPE_xxx \ ++ BTF_TRACING_TYPE(BTF_TRACING_TYPE_TASK, task_struct) \ ++ BTF_TRACING_TYPE(BTF_TRACING_TYPE_FILE, file) \ ++ BTF_TRACING_TYPE(BTF_TRACING_TYPE_VMA, vm_area_struct) ++ ++enum { ++#define BTF_TRACING_TYPE(name, type) name, ++BTF_TRACING_TYPE_xxx ++#undef BTF_TRACING_TYPE ++MAX_BTF_TRACING_TYPE, ++}; ++ ++extern u32 btf_tracing_ids[]; + + #endif +--- a/kernel/bpf/bpf_task_storage.c ++++ b/kernel/bpf/bpf_task_storage.c +@@ -323,7 +323,7 @@ const struct bpf_func_proto bpf_task_sto + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, +- .arg2_btf_id = &btf_task_struct_ids[0], ++ .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], + .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, + .arg4_type = ARG_ANYTHING, + }; +@@ -334,5 +334,5 @@ const struct bpf_func_proto bpf_task_sto + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, +- .arg2_btf_id = &btf_task_struct_ids[0], ++ .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], + }; +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -6354,10 +6354,10 @@ const struct bpf_func_proto bpf_btf_find + .arg4_type = ARG_ANYTHING, + }; + +-BTF_ID_LIST_GLOBAL(btf_task_struct_ids, 3) +-BTF_ID(struct, task_struct) +-BTF_ID(struct, file) +-BTF_ID(struct, vm_area_struct) ++BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE) ++#define BTF_TRACING_TYPE(name, type) BTF_ID(struct, type) ++BTF_TRACING_TYPE_xxx ++#undef BTF_TRACING_TYPE + + /* BTF ID set registration API for modules */ + +--- a/kernel/bpf/stackmap.c ++++ b/kernel/bpf/stackmap.c +@@ -489,7 +489,7 @@ const struct bpf_func_proto bpf_get_task + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, +- .arg1_btf_id = &btf_task_struct_ids[0], ++ .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], + .arg2_type = ARG_PTR_TO_UNINIT_MEM, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, + .arg4_type = ARG_ANYTHING, +--- a/kernel/bpf/task_iter.c ++++ b/kernel/bpf/task_iter.c +@@ -622,7 +622,7 @@ const struct bpf_func_proto bpf_find_vma + .func = bpf_find_vma, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, +- .arg1_btf_id = &btf_task_struct_ids[0], ++ .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_FUNC, + .arg4_type = ARG_PTR_TO_STACK_OR_NULL, +@@ -652,19 +652,19 @@ static int __init task_iter_init(void) + init_irq_work(&work->irq_work, do_mmap_read_unlock); + } + +- task_reg_info.ctx_arg_info[0].btf_id = btf_task_struct_ids[0]; ++ task_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK]; + ret = bpf_iter_reg_target(&task_reg_info); + if (ret) + return ret; + +- task_file_reg_info.ctx_arg_info[0].btf_id = btf_task_struct_ids[0]; +- task_file_reg_info.ctx_arg_info[1].btf_id = btf_task_struct_ids[1]; ++ task_file_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK]; ++ task_file_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_FILE]; + ret = bpf_iter_reg_target(&task_file_reg_info); + if (ret) + return ret; + +- task_vma_reg_info.ctx_arg_info[0].btf_id = btf_task_struct_ids[0]; +- task_vma_reg_info.ctx_arg_info[1].btf_id = btf_task_struct_ids[2]; ++ task_vma_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK]; ++ task_vma_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA]; + return bpf_iter_reg_target(&task_vma_reg_info); + } + late_initcall(task_iter_init); +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -6164,7 +6164,7 @@ static int set_find_vma_callback_state(s + callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID; + __mark_reg_known_zero(&callee->regs[BPF_REG_2]); + callee->regs[BPF_REG_2].btf = btf_vmlinux; +- callee->regs[BPF_REG_2].btf_id = btf_task_struct_ids[2]; ++ callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA], + + /* pointer to stack or null */ + callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4]; +--- a/kernel/trace/bpf_trace.c ++++ b/kernel/trace/bpf_trace.c +@@ -764,7 +764,7 @@ const struct bpf_func_proto bpf_get_curr + .func = bpf_get_current_task_btf, + .gpl_only = true, + .ret_type = RET_PTR_TO_BTF_ID, +- .ret_btf_id = &btf_task_struct_ids[0], ++ .ret_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], + }; + + BPF_CALL_1(bpf_task_pt_regs, struct task_struct *, task) +@@ -779,7 +779,7 @@ const struct bpf_func_proto bpf_task_pt_ + .func = bpf_task_pt_regs, + .gpl_only = true, + .arg1_type = ARG_PTR_TO_BTF_ID, +- .arg1_btf_id = &btf_task_struct_ids[0], ++ .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], + .ret_type = RET_PTR_TO_BTF_ID, + .ret_btf_id = &bpf_task_pt_regs_ids[0], + }; diff --git a/patches.suse/bpf-Introduce-helper-bpf_find_vma.patch b/patches.suse/bpf-Introduce-helper-bpf_find_vma.patch new file mode 100644 index 0000000..62b898b --- /dev/null +++ b/patches.suse/bpf-Introduce-helper-bpf_find_vma.patch @@ -0,0 +1,496 @@ +From: Song Liu +Date: Fri, 5 Nov 2021 16:23:29 -0700 +Subject: bpf: Introduce helper bpf_find_vma +Patch-mainline: v5.17-rc1 +Git-commit: 7c7e3d31e7856a8260a254f8c71db416f7f9f5a1 +References: jsc#PED-1368 + +In some profiler use cases, it is necessary to map an address to the +backing file, e.g., a shared library. bpf_find_vma helper provides a +flexible way to achieve this. bpf_find_vma maps an address of a task to +the vma (vm_area_struct) for this address, and feed the vma to an callback +BPF function. The callback function is necessary here, as we need to +ensure mmap_sem is unlocked. + +It is necessary to lock mmap_sem for find_vma. To lock and unlock mmap_sem +safely when irqs are disable, we use the same mechanism as stackmap with +build_id. Specifically, when irqs are disabled, the unlocked is postponed +in an irq_work. Refactor stackmap.c so that the irq_work is shared among +bpf_find_vma and stackmap helpers. + +Signed-off-by: Song Liu +Signed-off-by: Alexei Starovoitov +Tested-by: Hengqi Chen +Acked-by: Yonghong Song +Link: https://lore.kernel.org/bpf/20211105232330.1936330-2-songliubraving@fb.com +Acked-by: Shung-Hsi Yu +--- + include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 20 ++++++++++ + kernel/bpf/btf.c | 5 ++ + kernel/bpf/mmap_unlock_work.h | 65 +++++++++++++++++++++++++++++++++ + kernel/bpf/stackmap.c | 80 +++-------------------------------------- + kernel/bpf/task_iter.c | 76 +++++++++++++++++++++++++++++++++++--- + kernel/bpf/verifier.c | 34 +++++++++++++++++ + kernel/trace/bpf_trace.c | 2 + + tools/include/uapi/linux/bpf.h | 20 ++++++++++ + 9 files changed, 222 insertions(+), 81 deletions(-) + create mode 100644 kernel/bpf/mmap_unlock_work.h + +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -2154,6 +2154,7 @@ extern const struct bpf_func_proto bpf_b + extern const struct bpf_func_proto bpf_sk_setsockopt_proto; + extern const struct bpf_func_proto bpf_sk_getsockopt_proto; + extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto; ++extern const struct bpf_func_proto bpf_find_vma_proto; + + const struct bpf_func_proto *tracing_prog_func_proto( + enum bpf_func_id func_id, const struct bpf_prog *prog); +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -4938,6 +4938,25 @@ union bpf_attr { + * **-ENOENT** if symbol is not found. + * + * **-EPERM** if caller does not have permission to obtain kernel address. ++ * ++ * long bpf_find_vma(struct task_struct *task, u64 addr, void *callback_fn, void *callback_ctx, u64 flags) ++ * Description ++ * Find vma of *task* that contains *addr*, call *callback_fn* ++ * function with *task*, *vma*, and *callback_ctx*. ++ * The *callback_fn* should be a static function and ++ * the *callback_ctx* should be a pointer to the stack. ++ * The *flags* is used to control certain aspects of the helper. ++ * Currently, the *flags* must be 0. ++ * ++ * The expected callback signature is ++ * ++ * long (\*callback_fn)(struct task_struct \*task, struct vm_area_struct \*vma, void \*callback_ctx); ++ * ++ * Return ++ * 0 on success. ++ * **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*. ++ * **-EBUSY** if failed to try lock mmap_lock. ++ * **-EINVAL** for invalid **flags**. + */ + #define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ +@@ -5120,6 +5139,7 @@ union bpf_attr { + FN(trace_vprintk), \ + FN(skc_to_unix_sock), \ + FN(kallsyms_lookup_name), \ ++ FN(find_vma), \ + /* */ + + /* integer value in 'imm' field of BPF_CALL instruction selects which helper +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -6342,7 +6342,10 @@ const struct bpf_func_proto bpf_btf_find + .arg4_type = ARG_ANYTHING, + }; + +-BTF_ID_LIST_GLOBAL_SINGLE(btf_task_struct_ids, struct, task_struct) ++BTF_ID_LIST_GLOBAL(btf_task_struct_ids) ++BTF_ID(struct, task_struct) ++BTF_ID(struct, file) ++BTF_ID(struct, vm_area_struct) + + /* BTF ID set registration API for modules */ + +--- /dev/null ++++ b/kernel/bpf/mmap_unlock_work.h +@@ -0,0 +1,65 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* Copyright (c) 2021 Facebook ++ */ ++ ++#ifndef __MMAP_UNLOCK_WORK_H__ ++#define __MMAP_UNLOCK_WORK_H__ ++#include ++ ++/* irq_work to run mmap_read_unlock() in irq_work */ ++struct mmap_unlock_irq_work { ++ struct irq_work irq_work; ++ struct mm_struct *mm; ++}; ++ ++DECLARE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work); ++ ++/* ++ * We cannot do mmap_read_unlock() when the irq is disabled, because of ++ * risk to deadlock with rq_lock. To look up vma when the irqs are ++ * disabled, we need to run mmap_read_unlock() in irq_work. We use a ++ * percpu variable to do the irq_work. If the irq_work is already used ++ * by another lookup, we fall over. ++ */ ++static inline bool bpf_mmap_unlock_get_irq_work(struct mmap_unlock_irq_work **work_ptr) ++{ ++ struct mmap_unlock_irq_work *work = NULL; ++ bool irq_work_busy = false; ++ ++ if (irqs_disabled()) { ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { ++ work = this_cpu_ptr(&mmap_unlock_work); ++ if (irq_work_is_busy(&work->irq_work)) { ++ /* cannot queue more up_read, fallback */ ++ irq_work_busy = true; ++ } ++ } else { ++ /* ++ * PREEMPT_RT does not allow to trylock mmap sem in ++ * interrupt disabled context. Force the fallback code. ++ */ ++ irq_work_busy = true; ++ } ++ } ++ ++ *work_ptr = work; ++ return irq_work_busy; ++} ++ ++static inline void bpf_mmap_unlock_mm(struct mmap_unlock_irq_work *work, struct mm_struct *mm) ++{ ++ if (!work) { ++ mmap_read_unlock(mm); ++ } else { ++ work->mm = mm; ++ ++ /* The lock will be released once we're out of interrupt ++ * context. Tell lockdep that we've released it now so ++ * it doesn't complain that we forgot to release it. ++ */ ++ rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_); ++ irq_work_queue(&work->irq_work); ++ } ++} ++ ++#endif /* __MMAP_UNLOCK_WORK_H__ */ +--- a/kernel/bpf/stackmap.c ++++ b/kernel/bpf/stackmap.c +@@ -7,10 +7,10 @@ + #include + #include + #include +-#include + #include + #include + #include "percpu_freelist.h" ++#include "mmap_unlock_work.h" + + #define STACK_CREATE_FLAG_MASK \ + (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | \ +@@ -31,25 +31,6 @@ struct bpf_stack_map { + struct stack_map_bucket *buckets[]; + }; + +-/* irq_work to run up_read() for build_id lookup in nmi context */ +-struct stack_map_irq_work { +- struct irq_work irq_work; +- struct mm_struct *mm; +-}; +- +-static void do_up_read(struct irq_work *entry) +-{ +- struct stack_map_irq_work *work; +- +- if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT))) +- return; +- +- work = container_of(entry, struct stack_map_irq_work, irq_work); +- mmap_read_unlock_non_owner(work->mm); +-} +- +-static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work); +- + static inline bool stack_map_use_build_id(struct bpf_map *map) + { + return (map->map_flags & BPF_F_STACK_BUILD_ID); +@@ -149,35 +130,13 @@ static void stack_map_get_build_id_offse + u64 *ips, u32 trace_nr, bool user) + { + int i; ++ struct mmap_unlock_irq_work *work = NULL; ++ bool irq_work_busy = bpf_mmap_unlock_get_irq_work(&work); + struct vm_area_struct *vma; +- bool irq_work_busy = false; +- struct stack_map_irq_work *work = NULL; +- +- if (irqs_disabled()) { +- if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { +- work = this_cpu_ptr(&up_read_work); +- if (irq_work_is_busy(&work->irq_work)) { +- /* cannot queue more up_read, fallback */ +- irq_work_busy = true; +- } +- } else { +- /* +- * PREEMPT_RT does not allow to trylock mmap sem in +- * interrupt disabled context. Force the fallback code. +- */ +- irq_work_busy = true; +- } +- } + +- /* +- * We cannot do up_read() when the irq is disabled, because of +- * risk to deadlock with rq_lock. To do build_id lookup when the +- * irqs are disabled, we need to run up_read() in irq_work. We use +- * a percpu variable to do the irq_work. If the irq_work is +- * already used by another lookup, we fall back to report ips. +- * +- * Same fallback is used for kernel stack (!user) on a stackmap +- * with build_id. ++ /* If the irq_work is in use, fall back to report ips. Same ++ * fallback is used for kernel stack (!user) on a stackmap with ++ * build_id. + */ + if (!user || !current || !current->mm || irq_work_busy || + !mmap_read_trylock(current->mm)) { +@@ -203,19 +162,7 @@ static void stack_map_get_build_id_offse + - vma->vm_start; + id_offs[i].status = BPF_STACK_BUILD_ID_VALID; + } +- +- if (!work) { +- mmap_read_unlock(current->mm); +- } else { +- work->mm = current->mm; +- +- /* The lock will be released once we're out of interrupt +- * context. Tell lockdep that we've released it now so +- * it doesn't complain that we forgot to release it. +- */ +- rwsem_release(¤t->mm->mmap_lock.dep_map, _RET_IP_); +- irq_work_queue(&work->irq_work); +- } ++ bpf_mmap_unlock_mm(work, current->mm); + } + + static struct perf_callchain_entry * +@@ -719,16 +666,3 @@ const struct bpf_map_ops stack_trace_map + .map_btf_name = "bpf_stack_map", + .map_btf_id = &stack_trace_map_btf_id, + }; +- +-static int __init stack_map_init(void) +-{ +- int cpu; +- struct stack_map_irq_work *work; +- +- for_each_possible_cpu(cpu) { +- work = per_cpu_ptr(&up_read_work, cpu); +- init_irq_work(&work->irq_work, do_up_read); +- } +- return 0; +-} +-subsys_initcall(stack_map_init); +--- a/kernel/bpf/task_iter.c ++++ b/kernel/bpf/task_iter.c +@@ -8,6 +8,7 @@ + #include + #include + #include ++#include "mmap_unlock_work.h" + + struct bpf_iter_seq_task_common { + struct pid_namespace *ns; +@@ -524,10 +525,6 @@ static const struct seq_operations task_ + .show = task_vma_seq_show, + }; + +-BTF_ID_LIST(btf_task_file_ids) +-BTF_ID(struct, file) +-BTF_ID(struct, vm_area_struct) +- + static const struct bpf_iter_seq_info task_seq_info = { + .seq_ops = &task_seq_ops, + .init_seq_private = init_seq_pidns, +@@ -586,9 +583,74 @@ static struct bpf_iter_reg task_vma_reg_ + .seq_info = &task_vma_seq_info, + }; + ++BPF_CALL_5(bpf_find_vma, struct task_struct *, task, u64, start, ++ bpf_callback_t, callback_fn, void *, callback_ctx, u64, flags) ++{ ++ struct mmap_unlock_irq_work *work = NULL; ++ struct vm_area_struct *vma; ++ bool irq_work_busy = false; ++ struct mm_struct *mm; ++ int ret = -ENOENT; ++ ++ if (flags) ++ return -EINVAL; ++ ++ if (!task) ++ return -ENOENT; ++ ++ mm = task->mm; ++ if (!mm) ++ return -ENOENT; ++ ++ irq_work_busy = bpf_mmap_unlock_get_irq_work(&work); ++ ++ if (irq_work_busy || !mmap_read_trylock(mm)) ++ return -EBUSY; ++ ++ vma = find_vma(mm, start); ++ ++ if (vma && vma->vm_start <= start && vma->vm_end > start) { ++ callback_fn((u64)(long)task, (u64)(long)vma, ++ (u64)(long)callback_ctx, 0, 0); ++ ret = 0; ++ } ++ bpf_mmap_unlock_mm(work, mm); ++ return ret; ++} ++ ++const struct bpf_func_proto bpf_find_vma_proto = { ++ .func = bpf_find_vma, ++ .ret_type = RET_INTEGER, ++ .arg1_type = ARG_PTR_TO_BTF_ID, ++ .arg1_btf_id = &btf_task_struct_ids[0], ++ .arg2_type = ARG_ANYTHING, ++ .arg3_type = ARG_PTR_TO_FUNC, ++ .arg4_type = ARG_PTR_TO_STACK_OR_NULL, ++ .arg5_type = ARG_ANYTHING, ++}; ++ ++DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work); ++ ++static void do_mmap_read_unlock(struct irq_work *entry) ++{ ++ struct mmap_unlock_irq_work *work; ++ ++ if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT))) ++ return; ++ ++ work = container_of(entry, struct mmap_unlock_irq_work, irq_work); ++ mmap_read_unlock_non_owner(work->mm); ++} ++ + static int __init task_iter_init(void) + { +- int ret; ++ struct mmap_unlock_irq_work *work; ++ int ret, cpu; ++ ++ for_each_possible_cpu(cpu) { ++ work = per_cpu_ptr(&mmap_unlock_work, cpu); ++ init_irq_work(&work->irq_work, do_mmap_read_unlock); ++ } + + task_reg_info.ctx_arg_info[0].btf_id = btf_task_struct_ids[0]; + ret = bpf_iter_reg_target(&task_reg_info); +@@ -596,13 +658,13 @@ static int __init task_iter_init(void) + return ret; + + task_file_reg_info.ctx_arg_info[0].btf_id = btf_task_struct_ids[0]; +- task_file_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[0]; ++ task_file_reg_info.ctx_arg_info[1].btf_id = btf_task_struct_ids[1]; + ret = bpf_iter_reg_target(&task_file_reg_info); + if (ret) + return ret; + + task_vma_reg_info.ctx_arg_info[0].btf_id = btf_task_struct_ids[0]; +- task_vma_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[1]; ++ task_vma_reg_info.ctx_arg_info[1].btf_id = btf_task_struct_ids[2]; + return bpf_iter_reg_target(&task_vma_reg_info); + } + late_initcall(task_iter_init); +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -6149,6 +6149,33 @@ static int set_timer_callback_state(stru + return 0; + } + ++static int set_find_vma_callback_state(struct bpf_verifier_env *env, ++ struct bpf_func_state *caller, ++ struct bpf_func_state *callee, ++ int insn_idx) ++{ ++ /* bpf_find_vma(struct task_struct *task, u64 addr, ++ * void *callback_fn, void *callback_ctx, u64 flags) ++ * (callback_fn)(struct task_struct *task, ++ * struct vm_area_struct *vma, void *callback_ctx); ++ */ ++ callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1]; ++ ++ callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID; ++ __mark_reg_known_zero(&callee->regs[BPF_REG_2]); ++ callee->regs[BPF_REG_2].btf = btf_vmlinux; ++ callee->regs[BPF_REG_2].btf_id = btf_task_struct_ids[2]; ++ ++ /* pointer to stack or null */ ++ callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4]; ++ ++ /* unused */ ++ __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); ++ __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); ++ callee->in_callback_fn = true; ++ return 0; ++} ++ + static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) + { + struct bpf_verifier_state *state = env->cur_state; +@@ -6505,6 +6532,13 @@ static int check_helper_call(struct bpf_ + if (err < 0) + return -EINVAL; + } ++ ++ if (func_id == BPF_FUNC_find_vma) { ++ err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, ++ set_find_vma_callback_state); ++ if (err < 0) ++ return -EINVAL; ++ } + + if (func_id == BPF_FUNC_snprintf) { + err = check_bpf_snprintf_call(env, regs); +--- a/kernel/trace/bpf_trace.c ++++ b/kernel/trace/bpf_trace.c +@@ -1206,6 +1206,8 @@ bpf_tracing_func_proto(enum bpf_func_id + return &bpf_get_func_ip_proto_tracing; + case BPF_FUNC_get_branch_snapshot: + return &bpf_get_branch_snapshot_proto; ++ case BPF_FUNC_find_vma: ++ return &bpf_find_vma_proto; + case BPF_FUNC_trace_vprintk: + return bpf_get_trace_vprintk_proto(); + default: +--- a/tools/include/uapi/linux/bpf.h ++++ b/tools/include/uapi/linux/bpf.h +@@ -4941,6 +4941,25 @@ union bpf_attr { + * **-ENOENT** if symbol is not found. + * + * **-EPERM** if caller does not have permission to obtain kernel address. ++ * ++ * long bpf_find_vma(struct task_struct *task, u64 addr, void *callback_fn, void *callback_ctx, u64 flags) ++ * Description ++ * Find vma of *task* that contains *addr*, call *callback_fn* ++ * function with *task*, *vma*, and *callback_ctx*. ++ * The *callback_fn* should be a static function and ++ * the *callback_ctx* should be a pointer to the stack. ++ * The *flags* is used to control certain aspects of the helper. ++ * Currently, the *flags* must be 0. ++ * ++ * The expected callback signature is ++ * ++ * long (\*callback_fn)(struct task_struct \*task, struct vm_area_struct \*vma, void \*callback_ctx); ++ * ++ * Return ++ * 0 on success. ++ * **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*. ++ * **-EBUSY** if failed to try lock mmap_lock. ++ * **-EINVAL** for invalid **flags**. + */ + #define __BPF_FUNC_MAPPER(FN) \ + FN(unspec), \ +@@ -5123,6 +5142,7 @@ union bpf_attr { + FN(trace_vprintk), \ + FN(skc_to_unix_sock), \ + FN(kallsyms_lookup_name), \ ++ FN(find_vma), \ + /* */ + + /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/patches.suse/bpf-Invert-the-dependency-between-bpf-netns.h-and-ne.patch b/patches.suse/bpf-Invert-the-dependency-between-bpf-netns.h-and-ne.patch new file mode 100644 index 0000000..b8f7644 --- /dev/null +++ b/patches.suse/bpf-Invert-the-dependency-between-bpf-netns.h-and-ne.patch @@ -0,0 +1,70 @@ +From: Jakub Kicinski +Date: Wed, 29 Dec 2021 17:27:42 -0800 +Subject: bpf: Invert the dependency between bpf-netns.h and netns/bpf.h +Patch-mainline: v5.17-rc1 +Git-commit: aebb51ec3db2a871d74b4afad3f9914812acf120 +References: jsc#PED-1368 + +netns/bpf.h gets included by netdevice.h (thru net_namespace.h) +which in turn gets included in a lot of places. We should keep +netns/bpf.h as light-weight as possible. + +bpf-netns.h seems to contain more implementation details than +deserves to be included in a netns header. It needs to pull in +uapi/bpf.h to get various enum types. + +Move enum netns_bpf_attach_type to netns/bpf.h and invert the +dependency. This makes netns/bpf.h fit the mold of a struct +definition header more clearly, and drops the number of objects +rebuilt when uapi/bpf.h is touched from 7.7k to 1.1k. + +Signed-off-by: Jakub Kicinski +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211230012742.770642-3-kuba@kernel.org +Acked-by: Shung-Hsi Yu +--- + include/linux/bpf-netns.h | 8 +------- + include/net/netns/bpf.h | 9 ++++++++- + 2 files changed, 9 insertions(+), 8 deletions(-) + +--- a/include/linux/bpf-netns.h ++++ b/include/linux/bpf-netns.h +@@ -3,15 +3,9 @@ + #define _BPF_NETNS_H + + #include ++#include + #include + +-enum netns_bpf_attach_type { +- NETNS_BPF_INVALID = -1, +- NETNS_BPF_FLOW_DISSECTOR = 0, +- NETNS_BPF_SK_LOOKUP, +- MAX_NETNS_BPF_ATTACH_TYPE +-}; +- + static inline enum netns_bpf_attach_type + to_netns_bpf_attach_type(enum bpf_attach_type attach_type) + { +--- a/include/net/netns/bpf.h ++++ b/include/net/netns/bpf.h +@@ -6,11 +6,18 @@ + #ifndef __NETNS_BPF_H__ + #define __NETNS_BPF_H__ + +-#include ++#include + + struct bpf_prog; + struct bpf_prog_array; + ++enum netns_bpf_attach_type { ++ NETNS_BPF_INVALID = -1, ++ NETNS_BPF_FLOW_DISSECTOR = 0, ++ NETNS_BPF_SK_LOOKUP, ++ MAX_NETNS_BPF_ATTACH_TYPE ++}; ++ + struct netns_bpf { + /* Array of programs to run compiled from progs or links */ + struct bpf_prog_array __rcu *run_array[MAX_NETNS_BPF_ATTACH_TYPE]; diff --git a/patches.suse/bpf-Only-print-scratched-registers-and-stack-slots-t.patch b/patches.suse/bpf-Only-print-scratched-registers-and-stack-slots-t.patch new file mode 100644 index 0000000..e8f2373 --- /dev/null +++ b/patches.suse/bpf-Only-print-scratched-registers-and-stack-slots-t.patch @@ -0,0 +1,426 @@ +From: Christy Lee +Date: Thu, 16 Dec 2021 13:33:56 -0800 +Subject: bpf: Only print scratched registers and stack slots to verifier logs. +Patch-mainline: v5.17-rc1 +Git-commit: 0f55f9ed21f96630c6ec96805d42f92c0b458b37 +References: jsc#PED-1368 + +When printing verifier state for any log level, print full verifier +state only on function calls or on errors. Otherwise, only print the +registers and stack slots that were accessed. + +Log size differences: + +verif_scale_loop6 before: 234566564 +verif_scale_loop6 after: 72143943 +69% size reduction + +kfree_skb before: 166406 +kfree_skb after: 55386 +69% size reduction + +Before: + +156: (61) r0 = *(u32 *)(r1 +0) +157: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1=ctx(id=0,off=0,imm=0) R2_w=invP0 R10=fp0 fp-8_w=00000000 fp-16_w=00\ +000000 fp-24_w=00000000 fp-32_w=00000000 fp-40_w=00000000 fp-48_w=00000000 fp-56_w=00000000 fp-64_w=00000000 fp-72_w=00000000 fp-80_w=00000\ +000 fp-88_w=00000000 fp-96_w=00000000 fp-104_w=00000000 fp-112_w=00000000 fp-120_w=00000000 fp-128_w=00000000 fp-136_w=00000000 fp-144_w=00\ +000000 fp-152_w=00000000 fp-160_w=00000000 fp-168_w=00000000 fp-176_w=00000000 fp-184_w=00000000 fp-192_w=00000000 fp-200_w=00000000 fp-208\ +_w=00000000 fp-216_w=00000000 fp-224_w=00000000 fp-232_w=00000000 fp-240_w=00000000 fp-248_w=00000000 fp-256_w=00000000 fp-264_w=00000000 f\ +p-272_w=00000000 fp-280_w=00000000 fp-288_w=00000000 fp-296_w=00000000 fp-304_w=00000000 fp-312_w=00000000 fp-320_w=00000000 fp-328_w=00000\ +000 fp-336_w=00000000 fp-344_w=00000000 fp-352_w=00000000 fp-360_w=00000000 fp-368_w=00000000 fp-376_w=00000000 fp-384_w=00000000 fp-392_w=\ +00000000 fp-400_w=00000000 fp-408_w=00000000 fp-416_w=00000000 fp-424_w=00000000 fp-432_w=00000000 fp-440_w=00000000 fp-448_w=00000000 +; return skb->len; +157: (95) exit +Func#4 is safe for any args that match its prototype +Validating get_constant() func#5... +158: R1=invP(id=0) R10=fp0 +; int get_constant(long val) +158: (bf) r0 = r1 +159: R0_w=invP(id=1) R1=invP(id=1) R10=fp0 +; return val - 122; +159: (04) w0 += -122 +160: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1=invP(id=1) R10=fp0 +; return val - 122; +160: (95) exit +Func#5 is safe for any args that match its prototype +Validating get_skb_ifindex() func#6... +161: R1=invP(id=0) R2=ctx(id=0,off=0,imm=0) R3=invP(id=0) R10=fp0 +; int get_skb_ifindex(int val, struct __sk_buff *skb, int var) +161: (bc) w0 = w3 +162: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1=invP(id=0) R2=ctx(id=0,off=0,imm=0) R3=invP(id=0) R10=fp0 + +After: + +156: (61) r0 = *(u32 *)(r1 +0) +157: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1=ctx(id=0,off=0,imm=0) +; return skb->len; +157: (95) exit +Func#4 is safe for any args that match its prototype +Validating get_constant() func#5... +158: R1=invP(id=0) R10=fp0 +; int get_constant(long val) +158: (bf) r0 = r1 +159: R0_w=invP(id=1) R1=invP(id=1) +; return val - 122; +159: (04) w0 += -122 +160: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) +; return val - 122; +160: (95) exit +Func#5 is safe for any args that match its prototype +Validating get_skb_ifindex() func#6... +161: R1=invP(id=0) R2=ctx(id=0,off=0,imm=0) R3=invP(id=0) R10=fp0 +; int get_skb_ifindex(int val, struct __sk_buff *skb, int var) +161: (bc) w0 = w3 +162: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R3=invP(id=0) + +Signed-off-by: Christy Lee +Signed-off-by: Alexei Starovoitov +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211216213358.3374427-2-christylee@fb.com +Acked-by: Shung-Hsi Yu +--- + include/linux/bpf_verifier.h | 7 ++ + kernel/bpf/verifier.c | 83 ++++++++++++++++++++----- + tools/testing/selftests/bpf/prog_tests/align.c | 30 ++++----- + 3 files changed, 91 insertions(+), 29 deletions(-) + +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -474,6 +474,13 @@ struct bpf_verifier_env { + /* longest register parentage chain walked for liveness marking */ + u32 longest_mark_read_walk; + bpfptr_t fd_array; ++ ++ /* bit mask to keep track of whether a register has been accessed ++ * since the last time the function state was printed ++ */ ++ u32 scratched_regs; ++ /* Same as scratched_regs but for stack slots */ ++ u64 scratched_stack_slots; + }; + + __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -609,6 +609,44 @@ static const char *kernel_type_name(cons + return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off); + } + ++static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno) ++{ ++ env->scratched_regs |= 1U << regno; ++} ++ ++static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi) ++{ ++ env->scratched_stack_slots |= 1UL << spi; ++} ++ ++static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno) ++{ ++ return (env->scratched_regs >> regno) & 1; ++} ++ ++static bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno) ++{ ++ return (env->scratched_stack_slots >> regno) & 1; ++} ++ ++static bool verifier_state_scratched(const struct bpf_verifier_env *env) ++{ ++ return env->scratched_regs || env->scratched_stack_slots; ++} ++ ++static void mark_verifier_state_clean(struct bpf_verifier_env *env) ++{ ++ env->scratched_regs = 0U; ++ env->scratched_stack_slots = 0UL; ++} ++ ++/* Used for printing the entire verifier state. */ ++static void mark_verifier_state_scratched(struct bpf_verifier_env *env) ++{ ++ env->scratched_regs = ~0U; ++ env->scratched_stack_slots = ~0UL; ++} ++ + /* The reg state of a pointer or a bounded scalar was saved when + * it was spilled to the stack. + */ +@@ -624,7 +662,8 @@ static void scrub_spilled_slot(u8 *stype + } + + static void print_verifier_state(struct bpf_verifier_env *env, +- const struct bpf_func_state *state) ++ const struct bpf_func_state *state, ++ bool print_all) + { + const struct bpf_reg_state *reg; + enum bpf_reg_type t; +@@ -637,6 +676,8 @@ static void print_verifier_state(struct + t = reg->type; + if (t == NOT_INIT) + continue; ++ if (!print_all && !reg_scratched(env, i)) ++ continue; + verbose(env, " R%d", i); + print_liveness(env, reg->live); + verbose(env, "=%s", reg_type_str[t]); +@@ -726,6 +767,8 @@ static void print_verifier_state(struct + types_buf[BPF_REG_SIZE] = 0; + if (!valid) + continue; ++ if (!print_all && !stack_slot_scratched(env, i)) ++ continue; + verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); + print_liveness(env, state->stack[i].spilled_ptr.live); + if (is_spilled_reg(&state->stack[i])) { +@@ -751,6 +794,7 @@ static void print_verifier_state(struct + if (state->in_async_callback_fn) + verbose(env, " async_cb"); + verbose(env, "\n"); ++ mark_verifier_state_clean(env); + } + + /* copy array src of length n * size bytes to dst. dst is reallocated if it's too +@@ -1547,6 +1591,7 @@ static void init_func_state(struct bpf_v + state->frameno = frameno; + state->subprogno = subprogno; + init_reg_state(env, state); ++ mark_verifier_state_scratched(env); + } + + /* Similar to push_stack(), but for async callbacks */ +@@ -2234,6 +2279,8 @@ static int check_reg_arg(struct bpf_veri + return -EINVAL; + } + ++ mark_reg_scratched(env, regno); ++ + reg = ®s[regno]; + rw64 = is_reg64(env, insn, regno, reg, t); + if (t == SRC_OP) { +@@ -2684,7 +2731,7 @@ static int __mark_chain_precision(struct + reg->precise = true; + } + if (env->log.level & BPF_LOG_LEVEL) { +- print_verifier_state(env, func); ++ print_verifier_state(env, func, false); + verbose(env, "parent %s regs=%x stack=%llx marks\n", + new_marks ? "didn't have" : "already had", + reg_mask, stack_mask); +@@ -2843,6 +2890,7 @@ static int check_stack_write_fixed_off(s + env->insn_aux_data[insn_idx].sanitize_stack_spill = true; + } + ++ mark_stack_slot_scratched(env, spi); + if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) && + !register_is_null(reg) && env->bpf_capable) { + if (dst_reg != BPF_REG_FP) { +@@ -2964,6 +3012,7 @@ static int check_stack_write_var_off(str + slot = -i - 1; + spi = slot / BPF_REG_SIZE; + stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; ++ mark_stack_slot_scratched(env, spi); + + if (!env->allow_ptr_leaks + && *stype != NOT_INIT +@@ -3382,7 +3431,7 @@ static int check_mem_region_access(struc + * to make sure our theoretical access will be safe. + */ + if (env->log.level & BPF_LOG_LEVEL) +- print_verifier_state(env, state); ++ print_verifier_state(env, state, false); + + /* The minimum value is only important with signed + * comparisons where we can't assume the floor of a +@@ -6030,9 +6079,9 @@ static int __check_func_call(struct bpf_ + + if (env->log.level & BPF_LOG_LEVEL) { + verbose(env, "caller:\n"); +- print_verifier_state(env, caller); ++ print_verifier_state(env, caller, true); + verbose(env, "callee:\n"); +- print_verifier_state(env, callee); ++ print_verifier_state(env, callee, true); + } + return 0; + } +@@ -6247,9 +6296,9 @@ static int prepare_func_exit(struct bpf_ + *insn_idx = callee->callsite + 1; + if (env->log.level & BPF_LOG_LEVEL) { + verbose(env, "returning from callee:\n"); +- print_verifier_state(env, callee); ++ print_verifier_state(env, callee, true); + verbose(env, "to caller at %d:\n", *insn_idx); +- print_verifier_state(env, caller); ++ print_verifier_state(env, caller, true); + } + /* clear everything in the callee */ + free_func_state(callee); +@@ -8268,12 +8317,12 @@ static int adjust_reg_min_max_vals(struc + + /* Got here implies adding two SCALAR_VALUEs */ + if (WARN_ON_ONCE(ptr_reg)) { +- print_verifier_state(env, state); ++ print_verifier_state(env, state, true); + verbose(env, "verifier internal error: unexpected ptr_reg\n"); + return -EINVAL; + } + if (WARN_ON(!src_reg)) { +- print_verifier_state(env, state); ++ print_verifier_state(env, state, true); + verbose(env, "verifier internal error: no src_reg\n"); + return -EINVAL; + } +@@ -9412,7 +9461,7 @@ static int check_cond_jmp_op(struct bpf_ + return -EACCES; + } + if (env->log.level & BPF_LOG_LEVEL) +- print_verifier_state(env, this_branch->frame[this_branch->curframe]); ++ print_verifier_state(env, this_branch->frame[this_branch->curframe], false); + return 0; + } + +@@ -11282,14 +11331,17 @@ static int do_check(struct bpf_verifier_ + + if (env->log.level & BPF_LOG_LEVEL2 || + (env->log.level & BPF_LOG_LEVEL && do_print_state)) { +- if (env->log.level & BPF_LOG_LEVEL2) +- verbose(env, "%d:", env->insn_idx); +- else ++ if (env->log.level & BPF_LOG_LEVEL2) { ++ if (verifier_state_scratched(env)) ++ verbose(env, "%d:", env->insn_idx); ++ } else { + verbose(env, "\nfrom %d to %d%s:", + env->prev_insn_idx, env->insn_idx, + env->cur_state->speculative ? + " (speculative execution)" : ""); +- print_verifier_state(env, state->frame[state->curframe]); ++ } ++ print_verifier_state(env, state->frame[state->curframe], ++ false); + do_print_state = false; + } + +@@ -11511,6 +11563,7 @@ static int do_check(struct bpf_verifier_ + if (err) + return err; + process_bpf_exit: ++ mark_verifier_state_scratched(env); + update_branch_counts(env, env->cur_state); + err = pop_stack(env, &prev_insn_idx, + &env->insn_idx, pop_log); +@@ -14171,6 +14224,8 @@ int bpf_check(struct bpf_prog **prog, un + } + } + ++ mark_verifier_state_clean(env); ++ + if (IS_ERR(btf_vmlinux)) { + /* Either gcc or pahole or kernel are broken. */ + verbose(env, "in-kernel BTF is malformed\n"); +--- a/tools/testing/selftests/bpf/prog_tests/align.c ++++ b/tools/testing/selftests/bpf/prog_tests/align.c +@@ -39,8 +39,8 @@ static struct bpf_align_test tests[] = { + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { +- {1, "R1=ctx(id=0,off=0,imm=0)"}, +- {1, "R10=fp0"}, ++ {0, "R1=ctx(id=0,off=0,imm=0)"}, ++ {0, "R10=fp0"}, + {1, "R3_w=inv2"}, + {2, "R3_w=inv4"}, + {3, "R3_w=inv8"}, +@@ -67,8 +67,8 @@ static struct bpf_align_test tests[] = { + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { +- {1, "R1=ctx(id=0,off=0,imm=0)"}, +- {1, "R10=fp0"}, ++ {0, "R1=ctx(id=0,off=0,imm=0)"}, ++ {0, "R10=fp0"}, + {1, "R3_w=inv1"}, + {2, "R3_w=inv2"}, + {3, "R3_w=inv4"}, +@@ -96,8 +96,8 @@ static struct bpf_align_test tests[] = { + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { +- {1, "R1=ctx(id=0,off=0,imm=0)"}, +- {1, "R10=fp0"}, ++ {0, "R1=ctx(id=0,off=0,imm=0)"}, ++ {0, "R10=fp0"}, + {1, "R3_w=inv4"}, + {2, "R3_w=inv8"}, + {3, "R3_w=inv10"}, +@@ -118,8 +118,8 @@ static struct bpf_align_test tests[] = { + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { +- {1, "R1=ctx(id=0,off=0,imm=0)"}, +- {1, "R10=fp0"}, ++ {0, "R1=ctx(id=0,off=0,imm=0)"}, ++ {0, "R10=fp0"}, + {1, "R3_w=inv7"}, + {2, "R3_w=inv7"}, + {3, "R3_w=inv14"}, +@@ -161,13 +161,13 @@ static struct bpf_align_test tests[] = { + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { +- {7, "R0_w=pkt(id=0,off=8,r=8,imm=0)"}, ++ {6, "R0_w=pkt(id=0,off=8,r=8,imm=0)"}, + {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {10, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {11, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, +- {18, "R3=pkt_end(id=0,off=0,imm=0)"}, ++ {13, "R3_w=pkt_end(id=0,off=0,imm=0)"}, + {18, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {19, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"}, + {20, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, +@@ -234,10 +234,10 @@ static struct bpf_align_test tests[] = { + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { +- {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"}, ++ {3, "R5_w=pkt(id=0,off=0,r=0,imm=0)"}, + {5, "R5_w=pkt(id=0,off=14,r=0,imm=0)"}, + {6, "R4_w=pkt(id=0,off=14,r=0,imm=0)"}, +- {10, "R2=pkt(id=0,off=0,r=18,imm=0)"}, ++ {9, "R2=pkt(id=0,off=0,r=18,imm=0)"}, + {10, "R5=pkt(id=0,off=14,r=18,imm=0)"}, + {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, +@@ -296,7 +296,7 @@ static struct bpf_align_test tests[] = { + /* Calculated offset in R6 has unknown value, but known + * alignment of 4. + */ +- {8, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, ++ {6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, + {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + /* Offset is added to packet pointer R5, resulting in + * known fixed offset, and variable offset from R6. +@@ -386,7 +386,7 @@ static struct bpf_align_test tests[] = { + /* Calculated offset in R6 has unknown value, but known + * alignment of 4. + */ +- {8, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, ++ {6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, + {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + /* Adding 14 makes R6 be (4n+2) */ + {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, +@@ -458,7 +458,7 @@ static struct bpf_align_test tests[] = { + /* Checked s>=0 */ + {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, + /* packet pointer + nonnegative (4n+2) */ +- {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, ++ {12, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, + {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, + /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. + * We checked the bounds, but it might have been able diff --git a/patches.suse/bpf-Pass-a-set-of-bpf_core_relo-s-to-prog_load-comma.patch b/patches.suse/bpf-Pass-a-set-of-bpf_core_relo-s-to-prog_load-comma.patch new file mode 100644 index 0000000..110acb2 --- /dev/null +++ b/patches.suse/bpf-Pass-a-set-of-bpf_core_relo-s-to-prog_load-comma.patch @@ -0,0 +1,393 @@ +From: Alexei Starovoitov +Date: Wed, 1 Dec 2021 10:10:28 -0800 +Subject: bpf: Pass a set of bpf_core_relo-s to prog_load command. +Patch-mainline: v5.17-rc1 +Git-commit: fbd94c7afcf99c9f3b1ba1168657ecc428eb2c8d +References: jsc#PED-1368 + +struct bpf_core_relo is generated by llvm and processed by libbpf. +It's a de-facto uapi. +With CO-RE in the kernel the struct bpf_core_relo becomes uapi de-jure. +Add an ability to pass a set of 'struct bpf_core_relo' to prog_load command +and let the kernel perform CO-RE relocations. + +Note the struct bpf_line_info and struct bpf_func_info have the same +layout when passed from LLVM to libbpf and from libbpf to the kernel +except "insn_off" fields means "byte offset" when LLVM generates it. +Then libbpf converts it to "insn index" to pass to the kernel. +The struct bpf_core_relo's "insn_off" field is always "byte offset". + +Signed-off-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211201181040.23337-6-alexei.starovoitov@gmail.com +Acked-by: Shung-Hsi Yu +--- + include/linux/bpf.h | 8 ++++ + include/uapi/linux/bpf.h | 59 +++++++++++++++++++++++++- + kernel/bpf/btf.c | 6 +++ + kernel/bpf/syscall.c | 2 +- + kernel/bpf/verifier.c | 76 ++++++++++++++++++++++++++++++++++ + tools/include/uapi/linux/bpf.h | 59 +++++++++++++++++++++++++- + tools/lib/bpf/relo_core.h | 53 ------------------------ + 7 files changed, 207 insertions(+), 56 deletions(-) + +diff --git a/include/linux/bpf.h b/include/linux/bpf.h +index cad0829710be..8bbf08fbab66 100644 +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -1732,6 +1732,14 @@ bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); + const struct btf_func_model * + bpf_jit_find_kfunc_model(const struct bpf_prog *prog, + const struct bpf_insn *insn); ++struct bpf_core_ctx { ++ struct bpf_verifier_log *log; ++ const struct btf *btf; ++}; ++ ++int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, ++ int relo_idx, void *insn); ++ + #else /* !CONFIG_BPF_SYSCALL */ + static inline struct bpf_prog *bpf_prog_get(u32 ufd) + { +diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h +index 9e66b1880020..c26871263f1f 100644 +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -1342,8 +1342,10 @@ union bpf_attr { + /* or valid module BTF object fd or 0 to attach to vmlinux */ + __u32 attach_btf_obj_fd; + }; +- __u32 :32; /* pad */ ++ __u32 core_relo_cnt; /* number of bpf_core_relo */ + __aligned_u64 fd_array; /* array of FDs */ ++ __aligned_u64 core_relos; ++ __u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */ + }; + + struct { /* anonymous struct used by BPF_OBJ_* commands */ +@@ -6393,4 +6395,59 @@ enum bpf_core_relo_kind { + BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */ + }; + ++/* ++ * "struct bpf_core_relo" is used to pass relocation data form LLVM to libbpf ++ * and from libbpf to the kernel. ++ * ++ * CO-RE relocation captures the following data: ++ * - insn_off - instruction offset (in bytes) within a BPF program that needs ++ * its insn->imm field to be relocated with actual field info; ++ * - type_id - BTF type ID of the "root" (containing) entity of a relocatable ++ * type or field; ++ * - access_str_off - offset into corresponding .BTF string section. String ++ * interpretation depends on specific relocation kind: ++ * - for field-based relocations, string encodes an accessed field using ++ * a sequence of field and array indices, separated by colon (:). It's ++ * conceptually very close to LLVM's getelementptr ([0]) instruction's ++ * arguments for identifying offset to a field. ++ * - for type-based relocations, strings is expected to be just "0"; ++ * - for enum value-based relocations, string contains an index of enum ++ * value within its enum type; ++ * - kind - one of enum bpf_core_relo_kind; ++ * ++ * Example: ++ * struct sample { ++ * int a; ++ * struct { ++ * int b[10]; ++ * }; ++ * }; ++ * ++ * struct sample *s = ...; ++ * int *x = &s->a; // encoded as "0:0" (a is field #0) ++ * int *y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, ++ * // b is field #0 inside anon struct, accessing elem #5) ++ * int *z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) ++ * ++ * type_id for all relocs in this example will capture BTF type id of ++ * `struct sample`. ++ * ++ * Such relocation is emitted when using __builtin_preserve_access_index() ++ * Clang built-in, passing expression that captures field address, e.g.: ++ * ++ * bpf_probe_read(&dst, sizeof(dst), ++ * __builtin_preserve_access_index(&src->a.b.c)); ++ * ++ * In this case Clang will emit field relocation recording necessary data to ++ * be able to find offset of embedded `a.b.c` field within `src` struct. ++ * ++ * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction ++ */ ++struct bpf_core_relo { ++ __u32 insn_off; ++ __u32 type_id; ++ __u32 access_str_off; ++ enum bpf_core_relo_kind kind; ++}; ++ + #endif /* _UAPI__LINUX_BPF_H__ */ +diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c +index c79595aad55b..0d070461e2b8 100644 +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -6439,3 +6439,9 @@ size_t bpf_core_essential_name_len(const char *name) + } + return n; + } ++ ++int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, ++ int relo_idx, void *insn) ++{ ++ return -EOPNOTSUPP; ++} +diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c +index 47089d1d67a4..b3ada4085f85 100644 +--- a/kernel/bpf/syscall.c ++++ b/kernel/bpf/syscall.c +@@ -2184,7 +2184,7 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) + } + + /* last field in 'union bpf_attr' used by this command */ +-#define BPF_PROG_LOAD_LAST_FIELD fd_array ++#define BPF_PROG_LOAD_LAST_FIELD core_relo_rec_size + + static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr) + { +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 6c9c0d9a04a0..6522ffdea487 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -10273,6 +10273,78 @@ err_free: + return err; + } + ++#define MIN_CORE_RELO_SIZE sizeof(struct bpf_core_relo) ++#define MAX_CORE_RELO_SIZE MAX_FUNCINFO_REC_SIZE ++ ++static int check_core_relo(struct bpf_verifier_env *env, ++ const union bpf_attr *attr, ++ bpfptr_t uattr) ++{ ++ u32 i, nr_core_relo, ncopy, expected_size, rec_size; ++ struct bpf_core_relo core_relo = {}; ++ struct bpf_prog *prog = env->prog; ++ const struct btf *btf = prog->aux->btf; ++ struct bpf_core_ctx ctx = { ++ .log = &env->log, ++ .btf = btf, ++ }; ++ bpfptr_t u_core_relo; ++ int err; ++ ++ nr_core_relo = attr->core_relo_cnt; ++ if (!nr_core_relo) ++ return 0; ++ if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo)) ++ return -EINVAL; ++ ++ rec_size = attr->core_relo_rec_size; ++ if (rec_size < MIN_CORE_RELO_SIZE || ++ rec_size > MAX_CORE_RELO_SIZE || ++ rec_size % sizeof(u32)) ++ return -EINVAL; ++ ++ u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel); ++ expected_size = sizeof(struct bpf_core_relo); ++ ncopy = min_t(u32, expected_size, rec_size); ++ ++ /* Unlike func_info and line_info, copy and apply each CO-RE ++ * relocation record one at a time. ++ */ ++ for (i = 0; i < nr_core_relo; i++) { ++ /* future proofing when sizeof(bpf_core_relo) changes */ ++ err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size); ++ if (err) { ++ if (err == -E2BIG) { ++ verbose(env, "nonzero tailing record in core_relo"); ++ if (copy_to_bpfptr_offset(uattr, ++ offsetof(union bpf_attr, core_relo_rec_size), ++ &expected_size, sizeof(expected_size))) ++ err = -EFAULT; ++ } ++ break; ++ } ++ ++ if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) { ++ err = -EFAULT; ++ break; ++ } ++ ++ if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) { ++ verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n", ++ i, core_relo.insn_off, prog->len); ++ err = -EINVAL; ++ break; ++ } ++ ++ err = bpf_core_apply(&ctx, &core_relo, i, ++ &prog->insnsi[core_relo.insn_off / 8]); ++ if (err) ++ break; ++ bpfptr_add(&u_core_relo, rec_size); ++ } ++ return err; ++} ++ + static int check_btf_info(struct bpf_verifier_env *env, + const union bpf_attr *attr, + bpfptr_t uattr) +@@ -10303,6 +10375,10 @@ static int check_btf_info(struct bpf_verifier_env *env, + if (err) + return err; + ++ err = check_core_relo(env, attr, uattr); ++ if (err) ++ return err; ++ + return 0; + } + +diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h +index 9e66b1880020..c26871263f1f 100644 +--- a/tools/include/uapi/linux/bpf.h ++++ b/tools/include/uapi/linux/bpf.h +@@ -1342,8 +1342,10 @@ union bpf_attr { + /* or valid module BTF object fd or 0 to attach to vmlinux */ + __u32 attach_btf_obj_fd; + }; +- __u32 :32; /* pad */ ++ __u32 core_relo_cnt; /* number of bpf_core_relo */ + __aligned_u64 fd_array; /* array of FDs */ ++ __aligned_u64 core_relos; ++ __u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */ + }; + + struct { /* anonymous struct used by BPF_OBJ_* commands */ +@@ -6393,4 +6395,59 @@ enum bpf_core_relo_kind { + BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */ + }; + ++/* ++ * "struct bpf_core_relo" is used to pass relocation data form LLVM to libbpf ++ * and from libbpf to the kernel. ++ * ++ * CO-RE relocation captures the following data: ++ * - insn_off - instruction offset (in bytes) within a BPF program that needs ++ * its insn->imm field to be relocated with actual field info; ++ * - type_id - BTF type ID of the "root" (containing) entity of a relocatable ++ * type or field; ++ * - access_str_off - offset into corresponding .BTF string section. String ++ * interpretation depends on specific relocation kind: ++ * - for field-based relocations, string encodes an accessed field using ++ * a sequence of field and array indices, separated by colon (:). It's ++ * conceptually very close to LLVM's getelementptr ([0]) instruction's ++ * arguments for identifying offset to a field. ++ * - for type-based relocations, strings is expected to be just "0"; ++ * - for enum value-based relocations, string contains an index of enum ++ * value within its enum type; ++ * - kind - one of enum bpf_core_relo_kind; ++ * ++ * Example: ++ * struct sample { ++ * int a; ++ * struct { ++ * int b[10]; ++ * }; ++ * }; ++ * ++ * struct sample *s = ...; ++ * int *x = &s->a; // encoded as "0:0" (a is field #0) ++ * int *y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, ++ * // b is field #0 inside anon struct, accessing elem #5) ++ * int *z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) ++ * ++ * type_id for all relocs in this example will capture BTF type id of ++ * `struct sample`. ++ * ++ * Such relocation is emitted when using __builtin_preserve_access_index() ++ * Clang built-in, passing expression that captures field address, e.g.: ++ * ++ * bpf_probe_read(&dst, sizeof(dst), ++ * __builtin_preserve_access_index(&src->a.b.c)); ++ * ++ * In this case Clang will emit field relocation recording necessary data to ++ * be able to find offset of embedded `a.b.c` field within `src` struct. ++ * ++ * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction ++ */ ++struct bpf_core_relo { ++ __u32 insn_off; ++ __u32 type_id; ++ __u32 access_str_off; ++ enum bpf_core_relo_kind kind; ++}; ++ + #endif /* _UAPI__LINUX_BPF_H__ */ +diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h +index 3d0b86e7f439..f410691cc4e5 100644 +--- a/tools/lib/bpf/relo_core.h ++++ b/tools/lib/bpf/relo_core.h +@@ -6,59 +6,6 @@ + + #include + +-/* The minimum bpf_core_relo checked by the loader +- * +- * CO-RE relocation captures the following data: +- * - insn_off - instruction offset (in bytes) within a BPF program that needs +- * its insn->imm field to be relocated with actual field info; +- * - type_id - BTF type ID of the "root" (containing) entity of a relocatable +- * type or field; +- * - access_str_off - offset into corresponding .BTF string section. String +- * interpretation depends on specific relocation kind: +- * - for field-based relocations, string encodes an accessed field using +- * a sequence of field and array indices, separated by colon (:). It's +- * conceptually very close to LLVM's getelementptr ([0]) instruction's +- * arguments for identifying offset to a field. +- * - for type-based relocations, strings is expected to be just "0"; +- * - for enum value-based relocations, string contains an index of enum +- * value within its enum type; +- * +- * Example to provide a better feel. +- * +- * struct sample { +- * int a; +- * struct { +- * int b[10]; +- * }; +- * }; +- * +- * struct sample *s = ...; +- * int x = &s->a; // encoded as "0:0" (a is field #0) +- * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, +- * // b is field #0 inside anon struct, accessing elem #5) +- * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) +- * +- * type_id for all relocs in this example will capture BTF type id of +- * `struct sample`. +- * +- * Such relocation is emitted when using __builtin_preserve_access_index() +- * Clang built-in, passing expression that captures field address, e.g.: +- * +- * bpf_probe_read(&dst, sizeof(dst), +- * __builtin_preserve_access_index(&src->a.b.c)); +- * +- * In this case Clang will emit field relocation recording necessary data to +- * be able to find offset of embedded `a.b.c` field within `src` struct. +- * +- * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction +- */ +-struct bpf_core_relo { +- __u32 insn_off; +- __u32 type_id; +- __u32 access_str_off; +- enum bpf_core_relo_kind kind; +-}; +- + struct bpf_core_cand { + const struct btf *btf; + const struct btf_type *t; +-- +2.38.1 + diff --git a/patches.suse/bpf-Prepare-relo_core.c-for-kernel-duty.patch b/patches.suse/bpf-Prepare-relo_core.c-for-kernel-duty.patch new file mode 100644 index 0000000..7a73ed2 --- /dev/null +++ b/patches.suse/bpf-Prepare-relo_core.c-for-kernel-duty.patch @@ -0,0 +1,337 @@ +From: Alexei Starovoitov +Date: Wed, 1 Dec 2021 10:10:26 -0800 +Subject: bpf: Prepare relo_core.c for kernel duty. +Patch-mainline: v5.17-rc1 +Git-commit: 29db4bea1d10b73749d7992c1fc9ac13499e8871 +References: jsc#PED-1368 +X-info: modified context in kernel/bpf/btf.c due to d9847eb8be3d "bpf: Make CONFIG_DEBUG_INFO_BTF depend upon CONFIG_BPF_SYSCALL" already applied + +Make relo_core.c to be compiled for the kernel and for user space libbpf. + +Note the patch is reducing BPF_CORE_SPEC_MAX_LEN from 64 to 32. +This is the maximum number of nested structs and arrays. +For example: + struct sample { + int a; + struct { + int b[10]; + }; + }; + + struct sample *s = ...; + int *y = &s->b[5]; +This field access is encoded as "0:1:0:5" and spec len is 4. + +The follow up patch might bump it back to 64. + +Signed-off-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211201181040.23337-4-alexei.starovoitov@gmail.com +Acked-by: Shung-Hsi Yu +--- + include/linux/btf.h | 81 ++++++++++++++++++++++++++++++++++++++++++++++ + kernel/bpf/Makefile | 4 ++ + kernel/bpf/btf.c | 26 ++++++++++++++ + tools/lib/bpf/relo_core.c | 76 ++++++++++++++++++++++++++++++++++++------- + 4 files changed, 176 insertions(+), 11 deletions(-) + +--- a/include/linux/btf.h ++++ b/include/linux/btf.h +@@ -144,6 +144,53 @@ static inline bool btf_type_is_enum(cons + return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM; + } + ++static inline bool str_is_empty(const char *s) ++{ ++ return !s || !s[0]; ++} ++ ++static inline u16 btf_kind(const struct btf_type *t) ++{ ++ return BTF_INFO_KIND(t->info); ++} ++ ++static inline bool btf_is_enum(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_ENUM; ++} ++ ++static inline bool btf_is_composite(const struct btf_type *t) ++{ ++ u16 kind = btf_kind(t); ++ ++ return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; ++} ++ ++static inline bool btf_is_array(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_ARRAY; ++} ++ ++static inline bool btf_is_int(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_INT; ++} ++ ++static inline bool btf_is_ptr(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_PTR; ++} ++ ++static inline u8 btf_int_offset(const struct btf_type *t) ++{ ++ return BTF_INT_OFFSET(*(u32 *)(t + 1)); ++} ++ ++static inline u8 btf_int_encoding(const struct btf_type *t) ++{ ++ return BTF_INT_ENCODING(*(u32 *)(t + 1)); ++} ++ + static inline bool btf_type_is_scalar(const struct btf_type *t) + { + return btf_type_is_int(t) || btf_type_is_enum(t); +@@ -184,6 +231,11 @@ static inline u16 btf_type_vlen(const st + return BTF_INFO_VLEN(t->info); + } + ++static inline u16 btf_vlen(const struct btf_type *t) ++{ ++ return btf_type_vlen(t); ++} ++ + static inline u16 btf_func_linkage(const struct btf_type *t) + { + return BTF_INFO_VLEN(t->info); +@@ -208,11 +260,40 @@ static inline u32 __btf_member_bitfield_ + : 0; + } + ++static inline struct btf_member *btf_members(const struct btf_type *t) ++{ ++ return (struct btf_member *)(t + 1); ++} ++ ++static inline u32 btf_member_bit_offset(const struct btf_type *t, u32 member_idx) ++{ ++ const struct btf_member *m = btf_members(t) + member_idx; ++ ++ return __btf_member_bit_offset(t, m); ++} ++ ++static inline u32 btf_member_bitfield_size(const struct btf_type *t, u32 member_idx) ++{ ++ const struct btf_member *m = btf_members(t) + member_idx; ++ ++ return __btf_member_bitfield_size(t, m); ++} ++ + static inline const struct btf_member *btf_type_member(const struct btf_type *t) + { + return (const struct btf_member *)(t + 1); + } + ++static inline struct btf_array *btf_array(const struct btf_type *t) ++{ ++ return (struct btf_array *)(t + 1); ++} ++ ++static inline struct btf_enum *btf_enum(const struct btf_type *t) ++{ ++ return (struct btf_enum *)(t + 1); ++} ++ + static inline const struct btf_var_secinfo *btf_type_var_secinfo( + const struct btf_type *t) + { +--- a/kernel/bpf/Makefile ++++ b/kernel/bpf/Makefile +@@ -36,3 +36,7 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_struct_ + obj-${CONFIG_BPF_LSM} += bpf_lsm.o + endif + obj-$(CONFIG_BPF_PRELOAD) += preload/ ++ ++obj-$(CONFIG_BPF_SYSCALL) += relo_core.o ++$(obj)/relo_core.o: $(srctree)/tools/lib/bpf/relo_core.c FORCE ++ $(call if_changed_rule,cc_o_c) +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -6406,3 +6406,29 @@ DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfun + DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list); + + #endif ++ ++int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, ++ const struct btf *targ_btf, __u32 targ_id) ++{ ++ return -EOPNOTSUPP; ++} ++ ++static bool bpf_core_is_flavor_sep(const char *s) ++{ ++ /* check X___Y name pattern, where X and Y are not underscores */ ++ return s[0] != '_' && /* X */ ++ s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */ ++ s[4] != '_'; /* Y */ ++} ++ ++size_t bpf_core_essential_name_len(const char *name) ++{ ++ size_t n = strlen(name); ++ int i; ++ ++ for (i = n - 5; i >= 0; i--) { ++ if (bpf_core_is_flavor_sep(name + i)) ++ return i + 1; ++ } ++ return n; ++} +--- a/tools/lib/bpf/relo_core.c ++++ b/tools/lib/bpf/relo_core.c +@@ -1,6 +1,60 @@ + // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + /* Copyright (c) 2019 Facebook */ + ++#ifdef __KERNEL__ ++#include ++#include ++#include ++#include ++#include "relo_core.h" ++ ++static const char *btf_kind_str(const struct btf_type *t) ++{ ++ return btf_type_str(t); ++} ++ ++static bool is_ldimm64_insn(struct bpf_insn *insn) ++{ ++ return insn->code == (BPF_LD | BPF_IMM | BPF_DW); ++} ++ ++static const struct btf_type * ++skip_mods_and_typedefs(const struct btf *btf, u32 id, u32 *res_id) ++{ ++ return btf_type_skip_modifiers(btf, id, res_id); ++} ++ ++static const char *btf__name_by_offset(const struct btf *btf, u32 offset) ++{ ++ return btf_name_by_offset(btf, offset); ++} ++ ++static s64 btf__resolve_size(const struct btf *btf, u32 type_id) ++{ ++ const struct btf_type *t; ++ int size; ++ ++ t = btf_type_by_id(btf, type_id); ++ t = btf_resolve_size(btf, t, &size); ++ if (IS_ERR(t)) ++ return PTR_ERR(t); ++ return size; ++} ++ ++enum libbpf_print_level { ++ LIBBPF_WARN, ++ LIBBPF_INFO, ++ LIBBPF_DEBUG, ++}; ++ ++#undef pr_warn ++#undef pr_info ++#undef pr_debug ++#define pr_warn(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__) ++#define pr_info(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__) ++#define pr_debug(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__) ++#define libbpf_print(level, fmt, ...) bpf_log((void *)prog_name, fmt, ##__VA_ARGS__) ++#else + #include + #include + #include +@@ -12,8 +66,9 @@ + #include "btf.h" + #include "str_error.h" + #include "libbpf_internal.h" ++#endif + +-#define BPF_CORE_SPEC_MAX_LEN 64 ++#define BPF_CORE_SPEC_MAX_LEN 32 + + /* represents BPF CO-RE field or array element accessor */ + struct bpf_core_accessor { +@@ -150,7 +205,7 @@ static bool core_relo_is_enumval_based(e + * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access + * string to specify enumerator's value index that need to be relocated. + */ +-static int bpf_core_parse_spec(const struct btf *btf, ++static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, + __u32 type_id, + const char *spec_str, + enum bpf_core_relo_kind relo_kind, +@@ -272,8 +327,8 @@ static int bpf_core_parse_spec(const str + return sz; + spec->bit_offset += access_idx * sz * 8; + } else { +- pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", +- type_id, spec_str, i, id, btf_kind_str(t)); ++ pr_warn("prog '%s': relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", ++ prog_name, type_id, spec_str, i, id, btf_kind_str(t)); + return -EINVAL; + } + } +@@ -346,8 +401,6 @@ recur: + targ_id = btf_array(targ_type)->type; + goto recur; + default: +- pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n", +- btf_kind(local_type), local_id, targ_id); + return 0; + } + } +@@ -1045,7 +1098,7 @@ poison: + * [] () + => @, + * where is a C-syntax view of recorded field access, e.g.: x.a[3].b + */ +-static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) ++static void bpf_core_dump_spec(const char *prog_name, int level, const struct bpf_core_spec *spec) + { + const struct btf_type *t; + const struct btf_enum *e; +@@ -1167,7 +1220,8 @@ int bpf_core_apply_relo_insn(const char + if (str_is_empty(spec_str)) + return -EINVAL; + +- err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec); ++ err = bpf_core_parse_spec(prog_name, local_btf, local_id, spec_str, ++ relo->kind, &local_spec); + if (err) { + pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", + prog_name, relo_idx, local_id, btf_kind_str(local_type), +@@ -1178,7 +1232,7 @@ int bpf_core_apply_relo_insn(const char + + pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name, + relo_idx, core_relo_kind_str(relo->kind), relo->kind); +- bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); ++ bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, &local_spec); + libbpf_print(LIBBPF_DEBUG, "\n"); + + /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ +@@ -1204,14 +1258,14 @@ int bpf_core_apply_relo_insn(const char + if (err < 0) { + pr_warn("prog '%s': relo #%d: error matching candidate #%d ", + prog_name, relo_idx, i); +- bpf_core_dump_spec(LIBBPF_WARN, &cand_spec); ++ bpf_core_dump_spec(prog_name, LIBBPF_WARN, &cand_spec); + libbpf_print(LIBBPF_WARN, ": %d\n", err); + return err; + } + + pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name, + relo_idx, err == 0 ? "non-matching" : "matching", i); +- bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); ++ bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, &cand_spec); + libbpf_print(LIBBPF_DEBUG, "\n"); + + if (err == 0) diff --git a/patches.suse/bpf-Remove-a-redundant-comment-on-bpf_prog_free.patch b/patches.suse/bpf-Remove-a-redundant-comment-on-bpf_prog_free.patch new file mode 100644 index 0000000..b483748 --- /dev/null +++ b/patches.suse/bpf-Remove-a-redundant-comment-on-bpf_prog_free.patch @@ -0,0 +1,29 @@ +From: Christoph Hellwig +Date: Fri, 19 Nov 2021 17:32:12 +0100 +Subject: bpf: Remove a redundant comment on bpf_prog_free +Patch-mainline: v5.17-rc1 +Git-commit: ccb00292eb2dbb58a55850639356d07630cd3c46 +References: jsc#PED-1368 + +The comment telling that the prog_free helper is freeing the program is +not exactly useful, so just remove it. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Alexei Starovoitov +Acked-by: Song Liu +Link: https://lore.kernel.org/bpf/20211119163215.971383-3-hch@lst.de +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/core.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -2301,7 +2301,6 @@ static void bpf_prog_free_deferred(struc + } + } + +-/* Free internal BPF program */ + void bpf_prog_free(struct bpf_prog *fp) + { + struct bpf_prog_aux *aux = fp->aux; diff --git a/patches.suse/bpf-Remove-redundant-assignment-to-pointer-t.patch b/patches.suse/bpf-Remove-redundant-assignment-to-pointer-t.patch new file mode 100644 index 0000000..b812530 --- /dev/null +++ b/patches.suse/bpf-Remove-redundant-assignment-to-pointer-t.patch @@ -0,0 +1,30 @@ +From: Colin Ian King +Date: Tue, 7 Dec 2021 22:47:18 +0000 +Subject: bpf: Remove redundant assignment to pointer t +Patch-mainline: v5.17-rc1 +Git-commit: 73b6eae583f44e278e19489a411f9c1e22d530fc +References: jsc#PED-1368 + +The pointer t is being initialized with a value that is never read. The +pointer is re-assigned a value a littler later on, hence the initialization +is redundant and can be removed. + +Signed-off-by: Colin Ian King +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211207224718.59593-1-colin.i.king@gmail.com +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/btf.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -837,7 +837,7 @@ static const char *btf_show_name(struct + const char *ptr_suffix = &ptr_suffixes[strlen(ptr_suffixes)]; + const char *name = NULL, *prefix = "", *parens = ""; + const struct btf_member *m = show->state.member; +- const struct btf_type *t = show->state.type; ++ const struct btf_type *t; + const struct btf_array *array; + u32 id = show->state.type_id; + const char *member = NULL; diff --git a/patches.suse/bpf-Remove-the-cgroup-bpf-header-dependecy.patch b/patches.suse/bpf-Remove-the-cgroup-bpf-header-dependecy.patch new file mode 100644 index 0000000..e94b44a --- /dev/null +++ b/patches.suse/bpf-Remove-the-cgroup-bpf-header-dependecy.patch @@ -0,0 +1,218 @@ +From: Jakub Kicinski +Date: Wed, 15 Dec 2021 18:55:38 -0800 +Subject: bpf: Remove the cgroup -> bpf header dependecy +Patch-mainline: v5.17-rc1 +Git-commit: fd1740b6abac39f68ce12e201697f106e0f1d519 +References: jsc#PED-1368 + +Remove the dependency from cgroup-defs.h to bpf-cgroup.h and bpf.h. +This reduces the incremental build size of x86 allmodconfig after +bpf.h was touched from ~17k objects rebuilt to ~5k objects. +bpf.h is 2.2kLoC and is modified relatively often. + +We need a new header with just the definition of struct cgroup_bpf +and enum cgroup_bpf_attach_type, this is akin to cgroup-defs.h. + +Signed-off-by: Jakub Kicinski +Signed-off-by: Alexei Starovoitov +Acked-by: Tejun Heo +Link: https://lore.kernel.org/bpf/20211216025538.1649516-4-kuba@kernel.org +Acked-by: Shung-Hsi Yu +--- + include/linux/bpf-cgroup-defs.h | 70 ++++++++++++++++++++++++++++++++++++++++ + include/linux/bpf-cgroup.h | 57 -------------------------------- + include/linux/cgroup-defs.h | 2 - + kernel/bpf/core.c | 2 - + 4 files changed, 73 insertions(+), 58 deletions(-) + create mode 100644 include/linux/bpf-cgroup-defs.h + +--- /dev/null ++++ b/include/linux/bpf-cgroup-defs.h +@@ -0,0 +1,70 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _BPF_CGROUP_DEFS_H ++#define _BPF_CGROUP_DEFS_H ++ ++#ifdef CONFIG_CGROUP_BPF ++ ++#include ++#include ++#include ++ ++struct bpf_prog_array; ++ ++enum cgroup_bpf_attach_type { ++ CGROUP_BPF_ATTACH_TYPE_INVALID = -1, ++ CGROUP_INET_INGRESS = 0, ++ CGROUP_INET_EGRESS, ++ CGROUP_INET_SOCK_CREATE, ++ CGROUP_SOCK_OPS, ++ CGROUP_DEVICE, ++ CGROUP_INET4_BIND, ++ CGROUP_INET6_BIND, ++ CGROUP_INET4_CONNECT, ++ CGROUP_INET6_CONNECT, ++ CGROUP_INET4_POST_BIND, ++ CGROUP_INET6_POST_BIND, ++ CGROUP_UDP4_SENDMSG, ++ CGROUP_UDP6_SENDMSG, ++ CGROUP_SYSCTL, ++ CGROUP_UDP4_RECVMSG, ++ CGROUP_UDP6_RECVMSG, ++ CGROUP_GETSOCKOPT, ++ CGROUP_SETSOCKOPT, ++ CGROUP_INET4_GETPEERNAME, ++ CGROUP_INET6_GETPEERNAME, ++ CGROUP_INET4_GETSOCKNAME, ++ CGROUP_INET6_GETSOCKNAME, ++ CGROUP_INET_SOCK_RELEASE, ++ MAX_CGROUP_BPF_ATTACH_TYPE ++}; ++ ++struct cgroup_bpf { ++ /* array of effective progs in this cgroup */ ++ struct bpf_prog_array __rcu *effective[MAX_CGROUP_BPF_ATTACH_TYPE]; ++ ++ /* attached progs to this cgroup and attach flags ++ * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will ++ * have either zero or one element ++ * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS ++ */ ++ struct list_head progs[MAX_CGROUP_BPF_ATTACH_TYPE]; ++ u32 flags[MAX_CGROUP_BPF_ATTACH_TYPE]; ++ ++ /* list of cgroup shared storages */ ++ struct list_head storages; ++ ++ /* temp storage for effective prog array used by prog_attach/detach */ ++ struct bpf_prog_array *inactive; ++ ++ /* reference counter used to detach bpf programs after cgroup removal */ ++ struct percpu_ref refcnt; ++ ++ /* cgroup_bpf is released using a work queue */ ++ struct work_struct release_work; ++}; ++ ++#else /* CONFIG_CGROUP_BPF */ ++struct cgroup_bpf {}; ++#endif /* CONFIG_CGROUP_BPF */ ++ ++#endif +--- a/include/linux/bpf-cgroup.h ++++ b/include/linux/bpf-cgroup.h +@@ -3,10 +3,10 @@ + #define _BPF_CGROUP_H + + #include ++#include + #include + #include + #include +-#include + #include + #include + +@@ -23,33 +23,6 @@ struct ctl_table_header; + struct task_struct; + + #ifdef CONFIG_CGROUP_BPF +-enum cgroup_bpf_attach_type { +- CGROUP_BPF_ATTACH_TYPE_INVALID = -1, +- CGROUP_INET_INGRESS = 0, +- CGROUP_INET_EGRESS, +- CGROUP_INET_SOCK_CREATE, +- CGROUP_SOCK_OPS, +- CGROUP_DEVICE, +- CGROUP_INET4_BIND, +- CGROUP_INET6_BIND, +- CGROUP_INET4_CONNECT, +- CGROUP_INET6_CONNECT, +- CGROUP_INET4_POST_BIND, +- CGROUP_INET6_POST_BIND, +- CGROUP_UDP4_SENDMSG, +- CGROUP_UDP6_SENDMSG, +- CGROUP_SYSCTL, +- CGROUP_UDP4_RECVMSG, +- CGROUP_UDP6_RECVMSG, +- CGROUP_GETSOCKOPT, +- CGROUP_SETSOCKOPT, +- CGROUP_INET4_GETPEERNAME, +- CGROUP_INET6_GETPEERNAME, +- CGROUP_INET4_GETSOCKNAME, +- CGROUP_INET6_GETSOCKNAME, +- CGROUP_INET_SOCK_RELEASE, +- MAX_CGROUP_BPF_ATTACH_TYPE +-}; + + #define CGROUP_ATYPE(type) \ + case BPF_##type: return type +@@ -127,33 +100,6 @@ struct bpf_prog_list { + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; + }; + +-struct bpf_prog_array; +- +-struct cgroup_bpf { +- /* array of effective progs in this cgroup */ +- struct bpf_prog_array __rcu *effective[MAX_CGROUP_BPF_ATTACH_TYPE]; +- +- /* attached progs to this cgroup and attach flags +- * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will +- * have either zero or one element +- * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS +- */ +- struct list_head progs[MAX_CGROUP_BPF_ATTACH_TYPE]; +- u32 flags[MAX_CGROUP_BPF_ATTACH_TYPE]; +- +- /* list of cgroup shared storages */ +- struct list_head storages; +- +- /* temp storage for effective prog array used by prog_attach/detach */ +- struct bpf_prog_array *inactive; +- +- /* reference counter used to detach bpf programs after cgroup removal */ +- struct percpu_ref refcnt; +- +- /* cgroup_bpf is released using a work queue */ +- struct work_struct release_work; +-}; +- + int cgroup_bpf_inherit(struct cgroup *cgrp); + void cgroup_bpf_offline(struct cgroup *cgrp); + +@@ -471,7 +417,6 @@ int cgroup_bpf_prog_query(const union bp + union bpf_attr __user *uattr); + #else + +-struct cgroup_bpf {}; + static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } + static inline void cgroup_bpf_offline(struct cgroup *cgrp) {} + +--- a/include/linux/cgroup-defs.h ++++ b/include/linux/cgroup-defs.h +@@ -19,7 +19,7 @@ + #include + #include + #include +-#include ++#include + #include + + #ifdef CONFIG_CGROUPS +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -18,12 +18,12 @@ + */ + + #include ++#include + #include + #include + #include + #include + #include +-#include + #include + #include + #include diff --git a/patches.suse/bpf-Rename-btf_member-accessors.patch b/patches.suse/bpf-Rename-btf_member-accessors.patch new file mode 100644 index 0000000..2e1394b --- /dev/null +++ b/patches.suse/bpf-Rename-btf_member-accessors.patch @@ -0,0 +1,163 @@ +From: Alexei Starovoitov +Date: Wed, 1 Dec 2021 10:10:25 -0800 +Subject: bpf: Rename btf_member accessors. +Patch-mainline: v5.17-rc1 +Git-commit: 8293eb995f349aed28006792cad4cb48091919dd +References: jsc#PED-1368 + +Rename btf_member_bit_offset() and btf_member_bitfield_size() to +avoid conflicts with similarly named helpers in libbpf's btf.h. +Rename the kernel helpers, since libbpf helpers are part of uapi. + +Suggested-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211201181040.23337-3-alexei.starovoitov@gmail.com +Acked-by: Shung-Hsi Yu +--- + include/linux/btf.h | 8 ++++---- + kernel/bpf/bpf_struct_ops.c | 6 +++--- + kernel/bpf/btf.c | 18 +++++++++--------- + net/ipv4/bpf_tcp_ca.c | 6 +++--- + 4 files changed, 19 insertions(+), 19 deletions(-) + +--- a/include/linux/btf.h ++++ b/include/linux/btf.h +@@ -194,15 +194,15 @@ static inline bool btf_type_kflag(const + return BTF_INFO_KFLAG(t->info); + } + +-static inline u32 btf_member_bit_offset(const struct btf_type *struct_type, +- const struct btf_member *member) ++static inline u32 __btf_member_bit_offset(const struct btf_type *struct_type, ++ const struct btf_member *member) + { + return btf_type_kflag(struct_type) ? BTF_MEMBER_BIT_OFFSET(member->offset) + : member->offset; + } + +-static inline u32 btf_member_bitfield_size(const struct btf_type *struct_type, +- const struct btf_member *member) ++static inline u32 __btf_member_bitfield_size(const struct btf_type *struct_type, ++ const struct btf_member *member) + { + return btf_type_kflag(struct_type) ? BTF_MEMBER_BITFIELD_SIZE(member->offset) + : 0; +--- a/kernel/bpf/bpf_struct_ops.c ++++ b/kernel/bpf/bpf_struct_ops.c +@@ -165,7 +165,7 @@ void bpf_struct_ops_init(struct btf *btf + break; + } + +- if (btf_member_bitfield_size(t, member)) { ++ if (__btf_member_bitfield_size(t, member)) { + pr_warn("bit field member %s in struct %s is not supported\n", + mname, st_ops->name); + break; +@@ -296,7 +296,7 @@ static int check_zero_holes(const struct + const struct btf_type *mtype; + + for_each_member(i, t, member) { +- moff = btf_member_bit_offset(t, member) / 8; ++ moff = __btf_member_bit_offset(t, member) / 8; + if (moff > prev_mend && + memchr_inv(data + prev_mend, 0, moff - prev_mend)) + return -EINVAL; +@@ -387,7 +387,7 @@ static int bpf_struct_ops_map_update_ele + struct bpf_prog *prog; + u32 moff; + +- moff = btf_member_bit_offset(t, member) / 8; ++ moff = __btf_member_bit_offset(t, member) / 8; + ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL); + if (ptype == module_type) { + if (*(void **)(udata + moff)) +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -2969,7 +2969,7 @@ static s32 btf_struct_check_meta(struct + return -EINVAL; + } + +- offset = btf_member_bit_offset(t, member); ++ offset = __btf_member_bit_offset(t, member); + if (is_union && offset) { + btf_verifier_log_member(env, t, member, + "Invalid member bits_offset"); +@@ -3094,7 +3094,7 @@ static int btf_find_struct_field(const s + if (off != -ENOENT) + /* only one such field is allowed */ + return -E2BIG; +- off = btf_member_bit_offset(t, member); ++ off = __btf_member_bit_offset(t, member); + if (off % 8) + /* valid C code cannot generate such BTF */ + return -EINVAL; +@@ -3184,8 +3184,8 @@ static void __btf_struct_show(const stru + + btf_show_start_member(show, member); + +- member_offset = btf_member_bit_offset(t, member); +- bitfield_size = btf_member_bitfield_size(t, member); ++ member_offset = __btf_member_bit_offset(t, member); ++ bitfield_size = __btf_member_bitfield_size(t, member); + bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset); + bits8_offset = BITS_PER_BYTE_MASKED(member_offset); + if (bitfield_size) { +@@ -5060,7 +5060,7 @@ again: + if (array_elem->nelems != 0) + goto error; + +- moff = btf_member_bit_offset(t, member) / 8; ++ moff = __btf_member_bit_offset(t, member) / 8; + if (off < moff) + goto error; + +@@ -5083,14 +5083,14 @@ error: + + for_each_member(i, t, member) { + /* offset of the field in bytes */ +- moff = btf_member_bit_offset(t, member) / 8; ++ moff = __btf_member_bit_offset(t, member) / 8; + if (off + size <= moff) + /* won't find anything, field is already too far */ + break; + +- if (btf_member_bitfield_size(t, member)) { +- u32 end_bit = btf_member_bit_offset(t, member) + +- btf_member_bitfield_size(t, member); ++ if (__btf_member_bitfield_size(t, member)) { ++ u32 end_bit = __btf_member_bit_offset(t, member) + ++ __btf_member_bitfield_size(t, member); + + /* off <= moff instead of off == moff because clang + * does not generate a BTF member for anonymous +--- a/net/ipv4/bpf_tcp_ca.c ++++ b/net/ipv4/bpf_tcp_ca.c +@@ -169,7 +169,7 @@ static u32 prog_ops_moff(const struct bp + t = bpf_tcp_congestion_ops.type; + m = &btf_type_member(t)[midx]; + +- return btf_member_bit_offset(t, m) / 8; ++ return __btf_member_bit_offset(t, m) / 8; + } + + static const struct bpf_func_proto * +@@ -246,7 +246,7 @@ static int bpf_tcp_ca_init_member(const + utcp_ca = (const struct tcp_congestion_ops *)udata; + tcp_ca = (struct tcp_congestion_ops *)kdata; + +- moff = btf_member_bit_offset(t, member) / 8; ++ moff = __btf_member_bit_offset(t, member) / 8; + switch (moff) { + case offsetof(struct tcp_congestion_ops, flags): + if (utcp_ca->flags & ~TCP_CONG_MASK) +@@ -276,7 +276,7 @@ static int bpf_tcp_ca_init_member(const + static int bpf_tcp_ca_check_member(const struct btf_type *t, + const struct btf_member *member) + { +- if (is_unsupported(btf_member_bit_offset(t, member) / 8)) ++ if (is_unsupported(__btf_member_bit_offset(t, member) / 8)) + return -ENOTSUPP; + return 0; + } diff --git a/patches.suse/bpf-Replace-PTR_TO_XXX_OR_NULL-with-PTR_TO_XXX-PTR_M.patch b/patches.suse/bpf-Replace-PTR_TO_XXX_OR_NULL-with-PTR_TO_XXX-PTR_M.patch index 209b65c..2f4fb97 100644 --- a/patches.suse/bpf-Replace-PTR_TO_XXX_OR_NULL-with-PTR_TO_XXX-PTR_M.patch +++ b/patches.suse/bpf-Replace-PTR_TO_XXX_OR_NULL-with-PTR_TO_XXX-PTR_M.patch @@ -37,7 +37,7 @@ Acked-by: Shung-Hsi Yu --- a/include/linux/bpf.h +++ b/include/linux/bpf.h -@@ -462,18 +462,15 @@ enum bpf_reg_type { +@@ -465,18 +465,15 @@ enum bpf_reg_type { PTR_TO_CTX, /* reg points to bpf_context */ CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ PTR_TO_MAP_VALUE, /* reg points to map element value */ @@ -57,7 +57,7 @@ Acked-by: Shung-Hsi Yu PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */ /* PTR_TO_BTF_ID points to a kernel struct that does not need -@@ -491,18 +488,21 @@ enum bpf_reg_type { +@@ -494,18 +491,21 @@ enum bpf_reg_type { * been checked for null. Used primarily to inform the verifier * an explicit null check is required for this struct. */ @@ -95,10 +95,10 @@ Acked-by: Shung-Hsi Yu /* Liveness marks, used for registers and spilled-regs (in stack slots). * Read marks propagate upwards until they find a write mark; they record that -@@ -474,6 +476,8 @@ struct bpf_verifier_env { - /* longest register parentage chain walked for liveness marking */ - u32 longest_mark_read_walk; - bpfptr_t fd_array; +@@ -484,6 +486,8 @@ struct bpf_verifier_env { + /* Same as scratched_regs but for stack slots */ + u64 scratched_stack_slots; + u32 prev_log_len, prev_insn_print_len; + /* buffer used in reg_type_str() to generate reg_type string */ + char type_str_buf[TYPE_STR_BUF_LEN]; }; @@ -106,7 +106,7 @@ Acked-by: Shung-Hsi Yu __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c -@@ -4800,10 +4800,13 @@ bool btf_ctx_access(int off, int size, e +@@ -4940,10 +4940,13 @@ bool btf_ctx_access(int off, int size, e /* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */ for (i = 0; i < prog->aux->ctx_arg_info_size; i++) { const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i]; @@ -138,7 +138,7 @@ Acked-by: Shung-Hsi Yu --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c -@@ -439,18 +439,6 @@ static bool reg_type_not_null(enum bpf_r +@@ -442,18 +442,6 @@ static bool reg_type_not_null(enum bpf_r type == PTR_TO_SOCK_COMMON; } @@ -157,7 +157,7 @@ Acked-by: Shung-Hsi Yu static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) { return reg->type == PTR_TO_MAP_VALUE && -@@ -459,12 +447,9 @@ static bool reg_may_point_to_spin_lock(c +@@ -462,12 +450,9 @@ static bool reg_may_point_to_spin_lock(c static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type) { @@ -173,7 +173,7 @@ Acked-by: Shung-Hsi Yu } static bool arg_type_may_be_refcounted(enum bpf_arg_type type) -@@ -534,39 +519,52 @@ static bool is_cmpxchg_insn(const struct +@@ -537,39 +522,52 @@ static bool is_cmpxchg_insn(const struct insn->imm == BPF_CMPXCHG; } @@ -259,7 +259,7 @@ Acked-by: Shung-Hsi Yu static char slot_type_char[] = { [STACK_INVALID] = '?', -@@ -617,7 +615,7 @@ static void print_verifier_state(struct +@@ -675,7 +673,7 @@ static void print_verifier_state(struct continue; verbose(env, " R%d", i); print_liveness(env, reg->live); @@ -268,7 +268,7 @@ Acked-by: Shung-Hsi Yu if (t == SCALAR_VALUE && reg->precise) verbose(env, "P"); if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && -@@ -625,9 +623,8 @@ static void print_verifier_state(struct +@@ -683,9 +681,8 @@ static void print_verifier_state(struct /* reg->off should be 0 for SCALAR_VALUE */ verbose(env, "%lld", reg->var_off.value + reg->off); } else { @@ -280,7 +280,7 @@ Acked-by: Shung-Hsi Yu verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id)); verbose(env, "(id=%d", reg->id); if (reg_type_may_be_refcounted_or_null(t)) -@@ -636,10 +633,9 @@ static void print_verifier_state(struct +@@ -694,10 +691,9 @@ static void print_verifier_state(struct verbose(env, ",off=%d", reg->off); if (type_is_pkt_pointer(t)) verbose(env, ",r=%d", reg->range); @@ -294,7 +294,7 @@ Acked-by: Shung-Hsi Yu verbose(env, ",ks=%d,vs=%d", reg->map_ptr->key_size, reg->map_ptr->value_size); -@@ -709,7 +705,7 @@ static void print_verifier_state(struct +@@ -769,7 +765,7 @@ static void print_verifier_state(struct if (is_spilled_reg(&state->stack[i])) { reg = &state->stack[i].spilled_ptr; t = reg->type; @@ -303,7 +303,7 @@ Acked-by: Shung-Hsi Yu if (t == SCALAR_VALUE && reg->precise) verbose(env, "P"); if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) -@@ -1122,8 +1118,7 @@ static void mark_reg_known_zero(struct b +@@ -1202,8 +1198,7 @@ static void mark_reg_known_zero(struct b static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) { @@ -313,7 +313,7 @@ Acked-by: Shung-Hsi Yu const struct bpf_map *map = reg->map_ptr; if (map->inner_map_meta) { -@@ -1141,32 +1136,10 @@ static void mark_ptr_not_null_reg(struct +@@ -1222,32 +1217,10 @@ static void mark_ptr_not_null_reg(struct } else { reg->type = PTR_TO_MAP_VALUE; } @@ -349,7 +349,7 @@ Acked-by: Shung-Hsi Yu } static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg) -@@ -1888,7 +1861,7 @@ static int mark_reg_read(struct bpf_veri +@@ -2109,7 +2082,7 @@ static int mark_reg_read(struct bpf_veri break; if (parent->live & REG_LIVE_DONE) { verbose(env, "verifier BUG type %s var_off %lld off %d\n", @@ -358,7 +358,7 @@ Acked-by: Shung-Hsi Yu parent->var_off.value, parent->off); return -EFAULT; } -@@ -2546,9 +2519,8 @@ static int mark_chain_precision_stack(st +@@ -2774,9 +2747,8 @@ static int mark_chain_precision_stack(st static bool is_spillable_regtype(enum bpf_reg_type type) { @@ -369,7 +369,7 @@ Acked-by: Shung-Hsi Yu case PTR_TO_STACK: case PTR_TO_CTX: case PTR_TO_PACKET: -@@ -2557,21 +2529,14 @@ static bool is_spillable_regtype(enum bp +@@ -2785,21 +2757,14 @@ static bool is_spillable_regtype(enum bp case PTR_TO_FLOW_KEYS: case CONST_PTR_TO_MAP: case PTR_TO_SOCKET: @@ -391,7 +391,7 @@ Acked-by: Shung-Hsi Yu case PTR_TO_FUNC: case PTR_TO_MAP_KEY: return true; -@@ -3387,7 +3352,7 @@ static int check_ctx_access(struct bpf_v +@@ -3639,7 +3604,7 @@ static int check_ctx_access(struct bpf_v */ *reg_type = info.reg_type; @@ -400,7 +400,7 @@ Acked-by: Shung-Hsi Yu *btf = info.btf; *btf_id = info.btf_id; } else { -@@ -3455,7 +3420,7 @@ static int check_sock_access(struct bpf_ +@@ -3707,7 +3672,7 @@ static int check_sock_access(struct bpf_ } verbose(env, "R%d invalid %s access off=%d size=%d\n", @@ -409,7 +409,7 @@ Acked-by: Shung-Hsi Yu return -EACCES; } -@@ -4220,7 +4185,7 @@ static int check_mem_access(struct bpf_v +@@ -4472,7 +4437,7 @@ static int check_mem_access(struct bpf_v } else { mark_reg_known_zero(env, regs, value_regno); @@ -418,7 +418,7 @@ Acked-by: Shung-Hsi Yu regs[value_regno].id = ++env->id_gen; /* A load of ctx field could have different * actual load size with the one encoded in the -@@ -4228,8 +4193,7 @@ static int check_mem_access(struct bpf_v +@@ -4480,8 +4445,7 @@ static int check_mem_access(struct bpf_v * a sub-register. */ regs[value_regno].subreg_def = DEF_NOT_SUBREG; @@ -428,7 +428,7 @@ Acked-by: Shung-Hsi Yu regs[value_regno].btf = btf; regs[value_regno].btf_id = btf_id; } -@@ -4282,7 +4246,7 @@ static int check_mem_access(struct bpf_v +@@ -4534,7 +4498,7 @@ static int check_mem_access(struct bpf_v } else if (type_is_sk_pointer(reg->type)) { if (t == BPF_WRITE) { verbose(env, "R%d cannot write into %s\n", @@ -437,7 +437,7 @@ Acked-by: Shung-Hsi Yu return -EACCES; } err = check_sock_access(env, insn_idx, regno, off, size, t); -@@ -4301,7 +4265,7 @@ static int check_mem_access(struct bpf_v +@@ -4553,7 +4517,7 @@ static int check_mem_access(struct bpf_v } else if (reg->type == PTR_TO_RDONLY_BUF) { if (t == BPF_WRITE) { verbose(env, "R%d cannot write into %s\n", @@ -446,7 +446,7 @@ Acked-by: Shung-Hsi Yu return -EACCES; } err = check_buffer_access(env, reg, regno, off, size, false, -@@ -4317,7 +4281,7 @@ static int check_mem_access(struct bpf_v +@@ -4569,7 +4533,7 @@ static int check_mem_access(struct bpf_v mark_reg_unknown(env, regs, value_regno); } else { verbose(env, "R%d invalid mem access '%s'\n", regno, @@ -455,7 +455,7 @@ Acked-by: Shung-Hsi Yu return -EACCES; } -@@ -4391,7 +4355,7 @@ static int check_atomic(struct bpf_verif +@@ -4643,7 +4607,7 @@ static int check_atomic(struct bpf_verif is_sk_reg(env, insn->dst_reg)) { verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", insn->dst_reg, @@ -464,7 +464,7 @@ Acked-by: Shung-Hsi Yu return -EACCES; } -@@ -4617,9 +4581,9 @@ static int check_helper_mem_access(struc +@@ -4869,9 +4833,9 @@ static int check_helper_mem_access(struc register_is_null(reg)) return 0; @@ -477,7 +477,7 @@ Acked-by: Shung-Hsi Yu return -EACCES; } } -@@ -4630,7 +4594,7 @@ int check_mem_reg(struct bpf_verifier_en +@@ -4882,7 +4846,7 @@ int check_mem_reg(struct bpf_verifier_en if (register_is_null(reg)) return 0; @@ -486,7 +486,7 @@ Acked-by: Shung-Hsi Yu /* Assuming that the register contains a value check if the memory * access is safe. Temporarily save and restore the register's state as * the conversion shouldn't be visible to a caller. -@@ -4961,10 +4925,10 @@ static int check_reg_type(struct bpf_ver +@@ -5216,10 +5180,10 @@ static int check_reg_type(struct bpf_ver goto found; } @@ -500,7 +500,7 @@ Acked-by: Shung-Hsi Yu return -EACCES; found: -@@ -6182,6 +6146,7 @@ static int check_helper_call(struct bpf_ +@@ -6493,6 +6457,7 @@ static int check_helper_call(struct bpf_ { const struct bpf_func_proto *fn = NULL; enum bpf_return_type ret_type; @@ -508,7 +508,7 @@ Acked-by: Shung-Hsi Yu struct bpf_reg_state *regs; struct bpf_call_arg_meta meta; int insn_idx = *insn_idx_p; -@@ -6316,6 +6281,7 @@ static int check_helper_call(struct bpf_ +@@ -6633,6 +6598,7 @@ static int check_helper_call(struct bpf_ /* update return register (already marked as written above) */ ret_type = fn->ret_type; @@ -516,7 +516,7 @@ Acked-by: Shung-Hsi Yu if (ret_type == RET_INTEGER) { /* sets type to SCALAR_VALUE */ mark_reg_unknown(env, regs, BPF_REG_0); -@@ -6335,25 +6301,23 @@ static int check_helper_call(struct bpf_ +@@ -6652,25 +6618,23 @@ static int check_helper_call(struct bpf_ } regs[BPF_REG_0].map_ptr = meta.map_ptr; regs[BPF_REG_0].map_uid = meta.map_uid; @@ -550,7 +550,7 @@ Acked-by: Shung-Hsi Yu regs[BPF_REG_0].mem_size = meta.mem_size; } else if (base_type(ret_type) == RET_PTR_TO_MEM_OR_BTF_ID) { const struct btf_type *t; -@@ -6373,14 +6337,10 @@ static int check_helper_call(struct bpf_ +@@ -6690,14 +6654,10 @@ static int check_helper_call(struct bpf_ tname, PTR_ERR(ret)); return -EINVAL; } @@ -567,7 +567,7 @@ Acked-by: Shung-Hsi Yu regs[BPF_REG_0].btf = meta.ret_btf; regs[BPF_REG_0].btf_id = meta.ret_btf_id; } -@@ -6388,9 +6348,7 @@ static int check_helper_call(struct bpf_ +@@ -6705,9 +6665,7 @@ static int check_helper_call(struct bpf_ int ret_btf_id; mark_reg_known_zero(env, regs, BPF_REG_0); @@ -578,7 +578,7 @@ Acked-by: Shung-Hsi Yu ret_btf_id = *fn->ret_btf_id; if (ret_btf_id == 0) { verbose(env, "invalid return type %u of func %s#%d\n", -@@ -6409,7 +6367,7 @@ static int check_helper_call(struct bpf_ +@@ -6726,7 +6684,7 @@ static int check_helper_call(struct bpf_ return -EINVAL; } @@ -587,7 +587,7 @@ Acked-by: Shung-Hsi Yu regs[BPF_REG_0].id = ++env->id_gen; if (is_ptr_cast_function(func_id)) { -@@ -6608,25 +6566,25 @@ static bool check_reg_sane_offset(struct +@@ -6935,25 +6893,25 @@ static bool check_reg_sane_offset(struct if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) { verbose(env, "math between %s pointer and %lld is not allowed\n", @@ -617,7 +617,7 @@ Acked-by: Shung-Hsi Yu return false; } -@@ -7003,11 +6961,13 @@ static int adjust_ptr_min_max_vals(struc +@@ -7330,11 +7288,13 @@ static int adjust_ptr_min_max_vals(struc return -EACCES; } @@ -634,7 +634,7 @@ Acked-by: Shung-Hsi Yu case CONST_PTR_TO_MAP: /* smin_val represents the known value */ if (known && smin_val == 0 && opcode == BPF_ADD) -@@ -7015,14 +6975,11 @@ static int adjust_ptr_min_max_vals(struc +@@ -7342,14 +7302,11 @@ static int adjust_ptr_min_max_vals(struc fallthrough; case PTR_TO_PACKET_END: case PTR_TO_SOCKET: @@ -650,7 +650,7 @@ Acked-by: Shung-Hsi Yu return -EACCES; default: break; -@@ -8745,7 +8702,7 @@ static void mark_ptr_or_null_reg(struct +@@ -9072,7 +9029,7 @@ static void mark_ptr_or_null_reg(struct struct bpf_reg_state *reg, u32 id, bool is_null) { @@ -659,7 +659,7 @@ Acked-by: Shung-Hsi Yu !WARN_ON_ONCE(!reg->id)) { /* Old offset (both fixed and variable parts) should * have been known-zero, because we don't allow pointer -@@ -9123,7 +9080,7 @@ static int check_cond_jmp_op(struct bpf_ +@@ -9450,7 +9407,7 @@ static int check_cond_jmp_op(struct bpf_ */ if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K && insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && @@ -668,7 +668,7 @@ Acked-by: Shung-Hsi Yu /* Mark all identical registers in each branch as either * safe or unknown depending R == 0 or R != 0 conditional. */ -@@ -9377,7 +9334,7 @@ static int check_return_code(struct bpf_ +@@ -9704,7 +9661,7 @@ static int check_return_code(struct bpf_ /* enforce return zero from async callbacks like timer */ if (reg->type != SCALAR_VALUE) { verbose(env, "In async callback the register R0 is not a known value (%s)\n", @@ -677,7 +677,7 @@ Acked-by: Shung-Hsi Yu return -EINVAL; } -@@ -9391,7 +9348,7 @@ static int check_return_code(struct bpf_ +@@ -9718,7 +9675,7 @@ static int check_return_code(struct bpf_ if (is_subprog) { if (reg->type != SCALAR_VALUE) { verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n", @@ -686,7 +686,7 @@ Acked-by: Shung-Hsi Yu return -EINVAL; } return 0; -@@ -9455,7 +9412,7 @@ static int check_return_code(struct bpf_ +@@ -9782,7 +9739,7 @@ static int check_return_code(struct bpf_ if (reg->type != SCALAR_VALUE) { verbose(env, "At program exit the register R0 is not a known value (%s)\n", @@ -695,7 +695,7 @@ Acked-by: Shung-Hsi Yu return -EINVAL; } -@@ -10236,7 +10193,7 @@ static bool regsafe(struct bpf_verifier_ +@@ -10639,7 +10596,7 @@ static bool regsafe(struct bpf_verifier_ return true; if (rcur->type == NOT_INIT) return false; @@ -704,7 +704,7 @@ Acked-by: Shung-Hsi Yu case SCALAR_VALUE: if (env->explore_alu_limits) return false; -@@ -10258,6 +10215,22 @@ static bool regsafe(struct bpf_verifier_ +@@ -10661,6 +10618,22 @@ static bool regsafe(struct bpf_verifier_ } case PTR_TO_MAP_KEY: case PTR_TO_MAP_VALUE: @@ -727,7 +727,7 @@ Acked-by: Shung-Hsi Yu /* If the new min/max/var_off satisfy the old ones and * everything else matches, we are OK. * 'id' is not compared, since it's only used for maps with -@@ -10269,20 +10242,6 @@ static bool regsafe(struct bpf_verifier_ +@@ -10672,20 +10645,6 @@ static bool regsafe(struct bpf_verifier_ return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off); @@ -748,7 +748,7 @@ Acked-by: Shung-Hsi Yu case PTR_TO_PACKET_META: case PTR_TO_PACKET: if (rcur->type != rold->type) -@@ -10311,11 +10270,8 @@ static bool regsafe(struct bpf_verifier_ +@@ -10714,11 +10673,8 @@ static bool regsafe(struct bpf_verifier_ case PTR_TO_PACKET_END: case PTR_TO_FLOW_KEYS: case PTR_TO_SOCKET: @@ -760,7 +760,7 @@ Acked-by: Shung-Hsi Yu case PTR_TO_XDP_SOCK: /* Only valid matches are exact, which memcmp() above * would have accepted -@@ -10841,17 +10797,13 @@ next: +@@ -11244,17 +11200,13 @@ next: /* Return true if it's OK to have the same insn return a different type. */ static bool reg_type_mismatch_ok(enum bpf_reg_type type) { @@ -779,7 +779,7 @@ Acked-by: Shung-Hsi Yu return false; default: return true; -@@ -11075,7 +11027,7 @@ static int do_check(struct bpf_verifier_ +@@ -11480,7 +11432,7 @@ static int do_check(struct bpf_verifier_ if (is_ctx_reg(env, insn->dst_reg)) { verbose(env, "BPF_ST stores into R%d %s is not allowed\n", insn->dst_reg, @@ -801,7 +801,7 @@ Acked-by: Shung-Hsi Yu }; --- a/net/core/sock_map.c +++ b/net/core/sock_map.c -@@ -1564,7 +1564,7 @@ static struct bpf_iter_reg sock_map_iter +@@ -1563,7 +1563,7 @@ static struct bpf_iter_reg sock_map_iter .ctx_arg_info_size = 2, .ctx_arg_info = { { offsetof(struct bpf_iter__sockmap, key), diff --git a/patches.suse/bpf-Right-align-verifier-states-in-verifier-logs.patch b/patches.suse/bpf-Right-align-verifier-states-in-verifier-logs.patch new file mode 100644 index 0000000..6ca41e7 --- /dev/null +++ b/patches.suse/bpf-Right-align-verifier-states-in-verifier-logs.patch @@ -0,0 +1,523 @@ +From: Christy Lee +Date: Thu, 16 Dec 2021 19:42:45 -0800 +Subject: bpf: Right align verifier states in verifier logs. +Patch-mainline: v5.17-rc1 +Git-commit: 2e5766483c8c5cf886b4dc647a1741738dde7d79 +References: jsc#PED-1368 + +Make the verifier logs more readable, print the verifier states +on the corresponding instruction line. If the previous line was +not a bpf instruction, then print the verifier states on its own +line. + +Before: + +Validating test_pkt_access_subprog3() func#3... +86: R1=invP(id=0) R2=ctx(id=0,off=0,imm=0) R10=fp0 +; int test_pkt_access_subprog3(int val, struct __sk_buff *skb) +86: (bf) r6 = r2 +87: R2=ctx(id=0,off=0,imm=0) R6_w=ctx(id=0,off=0,imm=0) +87: (bc) w7 = w1 +88: R1=invP(id=0) R7_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) +; return get_skb_len(skb) * get_skb_ifindex(val, skb, get_constant(123)); +88: (bf) r1 = r6 +89: R1_w=ctx(id=0,off=0,imm=0) R6_w=ctx(id=0,off=0,imm=0) +89: (85) call pc+9 +Func#4 is global and valid. Skipping. +90: R0_w=invP(id=0) +90: (bc) w8 = w0 +91: R0_w=invP(id=0) R8_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) +; return get_skb_len(skb) * get_skb_ifindex(val, skb, get_constant(123)); +91: (b7) r1 = 123 +92: R1_w=invP123 +92: (85) call pc+65 +Func#5 is global and valid. Skipping. +93: R0=invP(id=0) + +After: + +86: R1=invP(id=0) R2=ctx(id=0,off=0,imm=0) R10=fp0 +; int test_pkt_access_subprog3(int val, struct __sk_buff *skb) +86: (bf) r6 = r2 ; R2=ctx(id=0,off=0,imm=0) R6_w=ctx(id=0,off=0,imm=0) +87: (bc) w7 = w1 ; R1=invP(id=0) R7_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) +; return get_skb_len(skb) * get_skb_ifindex(val, skb, get_constant(123)); +88: (bf) r1 = r6 ; R1_w=ctx(id=0,off=0,imm=0) R6_w=ctx(id=0,off=0,imm=0) +89: (85) call pc+9 +Func#4 is global and valid. Skipping. +90: R0_w=invP(id=0) +90: (bc) w8 = w0 ; R0_w=invP(id=0) R8_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) +; return get_skb_len(skb) * get_skb_ifindex(val, skb, get_constant(123)); +91: (b7) r1 = 123 ; R1_w=invP123 +92: (85) call pc+65 +Func#5 is global and valid. Skipping. +93: R0=invP(id=0) + +Signed-off-by: Christy Lee +Acked-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Acked-by: Shung-Hsi Yu +--- + include/linux/bpf_verifier.h | 3 + kernel/bpf/verifier.c | 57 +++++--- + tools/testing/selftests/bpf/prog_tests/align.c | 169 +++++++++++++------------ + 3 files changed, 131 insertions(+), 98 deletions(-) + +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -388,6 +388,8 @@ static inline bool bpf_verifier_log_full + #define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2) + #define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS) + #define BPF_LOG_KERNEL (BPF_LOG_MASK + 1) /* kernel internal flag */ ++#define BPF_LOG_MIN_ALIGNMENT 8U ++#define BPF_LOG_ALIGNMENT 40U + + static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) + { +@@ -481,6 +483,7 @@ struct bpf_verifier_env { + u32 scratched_regs; + /* Same as scratched_regs but for stack slots */ + u64 scratched_stack_slots; ++ u32 prev_log_len, prev_insn_print_len; + }; + + __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -797,6 +797,25 @@ static void print_verifier_state(struct + mark_verifier_state_clean(env); + } + ++static inline u32 vlog_alignment(u32 pos) ++{ ++ return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT), ++ BPF_LOG_MIN_ALIGNMENT) - pos - 1; ++} ++ ++static void print_insn_state(struct bpf_verifier_env *env, ++ const struct bpf_func_state *state) ++{ ++ if (env->prev_log_len && env->prev_log_len == env->log.len_used) { ++ /* remove new line character */ ++ bpf_vlog_reset(&env->log, env->prev_log_len - 1); ++ verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_len), ' '); ++ } else { ++ verbose(env, "%d:", env->insn_idx); ++ } ++ print_verifier_state(env, state, false); ++} ++ + /* copy array src of length n * size bytes to dst. dst is reallocated if it's too + * small to hold src. This is different from krealloc since we don't want to preserve + * the contents of dst. +@@ -2731,10 +2750,10 @@ static int __mark_chain_precision(struct + reg->precise = true; + } + if (env->log.level & BPF_LOG_LEVEL) { +- print_verifier_state(env, func, false); +- verbose(env, "parent %s regs=%x stack=%llx marks\n", ++ verbose(env, "parent %s regs=%x stack=%llx marks:", + new_marks ? "didn't have" : "already had", + reg_mask, stack_mask); ++ print_verifier_state(env, func, true); + } + + if (!reg_mask && !stack_mask) +@@ -3429,11 +3448,8 @@ static int check_mem_region_access(struc + /* We may have adjusted the register pointing to memory region, so we + * need to try adding each of min_value and max_value to off + * to make sure our theoretical access will be safe. +- */ +- if (env->log.level & BPF_LOG_LEVEL) +- print_verifier_state(env, state, false); +- +- /* The minimum value is only important with signed ++ * ++ * The minimum value is only important with signed + * comparisons where we can't assume the floor of a + * value is 0. If we are using signed variables for our + * index'es we need to make sure that whatever we use +@@ -9461,7 +9477,7 @@ static int check_cond_jmp_op(struct bpf_ + return -EACCES; + } + if (env->log.level & BPF_LOG_LEVEL) +- print_verifier_state(env, this_branch->frame[this_branch->curframe], false); ++ print_insn_state(env, this_branch->frame[this_branch->curframe]); + return 0; + } + +@@ -11329,19 +11345,12 @@ static int do_check(struct bpf_verifier_ + if (need_resched()) + cond_resched(); + +- if (env->log.level & BPF_LOG_LEVEL2 || +- (env->log.level & BPF_LOG_LEVEL && do_print_state)) { +- if (env->log.level & BPF_LOG_LEVEL2) { +- if (verifier_state_scratched(env)) +- verbose(env, "%d:", env->insn_idx); +- } else { +- verbose(env, "\nfrom %d to %d%s:", +- env->prev_insn_idx, env->insn_idx, +- env->cur_state->speculative ? +- " (speculative execution)" : ""); +- } +- print_verifier_state(env, state->frame[state->curframe], +- false); ++ if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) { ++ verbose(env, "\nfrom %d to %d%s:", ++ env->prev_insn_idx, env->insn_idx, ++ env->cur_state->speculative ? ++ " (speculative execution)" : ""); ++ print_verifier_state(env, state->frame[state->curframe], true); + do_print_state = false; + } + +@@ -11352,9 +11361,15 @@ static int do_check(struct bpf_verifier_ + .private_data = env, + }; + ++ if (verifier_state_scratched(env)) ++ print_insn_state(env, state->frame[state->curframe]); ++ + verbose_linfo(env, env->insn_idx, "; "); ++ env->prev_log_len = env->log.len_used; + verbose(env, "%d: ", env->insn_idx); + print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); ++ env->prev_insn_print_len = env->log.len_used - env->prev_log_len; ++ env->prev_log_len = env->log.len_used; + } + + if (bpf_prog_is_dev_bound(env->prog->aux)) { +--- a/tools/testing/selftests/bpf/prog_tests/align.c ++++ b/tools/testing/selftests/bpf/prog_tests/align.c +@@ -41,11 +41,11 @@ static struct bpf_align_test tests[] = { + .matches = { + {0, "R1=ctx(id=0,off=0,imm=0)"}, + {0, "R10=fp0"}, +- {1, "R3_w=inv2"}, +- {2, "R3_w=inv4"}, +- {3, "R3_w=inv8"}, +- {4, "R3_w=inv16"}, +- {5, "R3_w=inv32"}, ++ {0, "R3_w=inv2"}, ++ {1, "R3_w=inv4"}, ++ {2, "R3_w=inv8"}, ++ {3, "R3_w=inv16"}, ++ {4, "R3_w=inv32"}, + }, + }, + { +@@ -69,17 +69,17 @@ static struct bpf_align_test tests[] = { + .matches = { + {0, "R1=ctx(id=0,off=0,imm=0)"}, + {0, "R10=fp0"}, +- {1, "R3_w=inv1"}, +- {2, "R3_w=inv2"}, +- {3, "R3_w=inv4"}, +- {4, "R3_w=inv8"}, +- {5, "R3_w=inv16"}, +- {6, "R3_w=inv1"}, +- {7, "R4_w=inv32"}, +- {8, "R4_w=inv16"}, +- {9, "R4_w=inv8"}, +- {10, "R4_w=inv4"}, +- {11, "R4_w=inv2"}, ++ {0, "R3_w=inv1"}, ++ {1, "R3_w=inv2"}, ++ {2, "R3_w=inv4"}, ++ {3, "R3_w=inv8"}, ++ {4, "R3_w=inv16"}, ++ {5, "R3_w=inv1"}, ++ {6, "R4_w=inv32"}, ++ {7, "R4_w=inv16"}, ++ {8, "R4_w=inv8"}, ++ {9, "R4_w=inv4"}, ++ {10, "R4_w=inv2"}, + }, + }, + { +@@ -98,12 +98,12 @@ static struct bpf_align_test tests[] = { + .matches = { + {0, "R1=ctx(id=0,off=0,imm=0)"}, + {0, "R10=fp0"}, +- {1, "R3_w=inv4"}, +- {2, "R3_w=inv8"}, +- {3, "R3_w=inv10"}, +- {4, "R4_w=inv8"}, +- {5, "R4_w=inv12"}, +- {6, "R4_w=inv14"}, ++ {0, "R3_w=inv4"}, ++ {1, "R3_w=inv8"}, ++ {2, "R3_w=inv10"}, ++ {3, "R4_w=inv8"}, ++ {4, "R4_w=inv12"}, ++ {5, "R4_w=inv14"}, + }, + }, + { +@@ -120,10 +120,10 @@ static struct bpf_align_test tests[] = { + .matches = { + {0, "R1=ctx(id=0,off=0,imm=0)"}, + {0, "R10=fp0"}, ++ {0, "R3_w=inv7"}, + {1, "R3_w=inv7"}, +- {2, "R3_w=inv7"}, +- {3, "R3_w=inv14"}, +- {4, "R3_w=inv56"}, ++ {2, "R3_w=inv14"}, ++ {3, "R3_w=inv56"}, + }, + }, + +@@ -162,18 +162,18 @@ static struct bpf_align_test tests[] = { + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + {6, "R0_w=pkt(id=0,off=8,r=8,imm=0)"}, +- {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, +- {8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, +- {9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, +- {10, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, +- {11, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, +- {13, "R3_w=pkt_end(id=0,off=0,imm=0)"}, +- {18, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, +- {19, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"}, +- {20, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, +- {21, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, +- {22, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, +- {23, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, ++ {6, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, ++ {7, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, ++ {8, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, ++ {9, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, ++ {10, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, ++ {12, "R3_w=pkt_end(id=0,off=0,imm=0)"}, ++ {17, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, ++ {18, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"}, ++ {19, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, ++ {20, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, ++ {21, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, ++ {22, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + }, + }, + { +@@ -194,16 +194,16 @@ static struct bpf_align_test tests[] = { + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { +- {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, +- {8, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, +- {9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, +- {10, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, +- {11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, +- {12, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, +- {13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, +- {14, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, +- {15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, +- {16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, ++ {6, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, ++ {7, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, ++ {8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, ++ {9, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, ++ {10, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, ++ {11, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, ++ {12, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, ++ {13, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, ++ {14, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, ++ {15, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + }, + }, + { +@@ -234,14 +234,14 @@ static struct bpf_align_test tests[] = { + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { +- {3, "R5_w=pkt(id=0,off=0,r=0,imm=0)"}, +- {5, "R5_w=pkt(id=0,off=14,r=0,imm=0)"}, +- {6, "R4_w=pkt(id=0,off=14,r=0,imm=0)"}, ++ {2, "R5_w=pkt(id=0,off=0,r=0,imm=0)"}, ++ {4, "R5_w=pkt(id=0,off=14,r=0,imm=0)"}, ++ {5, "R4_w=pkt(id=0,off=14,r=0,imm=0)"}, + {9, "R2=pkt(id=0,off=0,r=18,imm=0)"}, + {10, "R5=pkt(id=0,off=14,r=18,imm=0)"}, + {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, ++ {13, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, + {14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, +- {15, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, + }, + }, + { +@@ -297,7 +297,7 @@ static struct bpf_align_test tests[] = { + * alignment of 4. + */ + {6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, +- {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, ++ {7, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + /* Offset is added to packet pointer R5, resulting in + * known fixed offset, and variable offset from R6. + */ +@@ -313,11 +313,11 @@ static struct bpf_align_test tests[] = { + /* Variable offset is added to R5 packet pointer, + * resulting in auxiliary alignment of 4. + */ +- {18, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, ++ {17, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + /* Constant offset is added to R5, resulting in + * reg->off of 14. + */ +- {19, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, ++ {18, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + /* At the time the word size load is performed from R5, + * its total fixed offset is NET_IP_ALIGN + reg->off + * (14) which is 16. Then the variable offset is 4-byte +@@ -329,18 +329,18 @@ static struct bpf_align_test tests[] = { + /* Constant offset is added to R5 packet pointer, + * resulting in reg->off value of 14. + */ +- {26, "R5_w=pkt(id=0,off=14,r=8"}, ++ {25, "R5_w=pkt(id=0,off=14,r=8"}, + /* Variable offset is added to R5, resulting in a + * variable offset of (4n). + */ +- {27, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, ++ {26, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + /* Constant is added to R5 again, setting reg->off to 18. */ +- {28, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, ++ {27, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + /* And once more we add a variable; resulting var_off + * is still (4n), fixed offset is not changed. + * Also, we create a new reg->id. + */ +- {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"}, ++ {28, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"}, + /* At the time the word size load is performed from R5, + * its total fixed offset is NET_IP_ALIGN + reg->off (18) + * which is 20. Then the variable offset is (4n), so +@@ -387,12 +387,12 @@ static struct bpf_align_test tests[] = { + * alignment of 4. + */ + {6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, +- {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, ++ {7, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + /* Adding 14 makes R6 be (4n+2) */ +- {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, ++ {8, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + /* Packet pointer has (4n+2) offset */ + {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"}, +- {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"}, ++ {12, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"}, + /* At the time the word size load is performed from R5, + * its total fixed offset is NET_IP_ALIGN + reg->off (0) + * which is 2. Then the variable offset is (4n+2), so +@@ -403,12 +403,12 @@ static struct bpf_align_test tests[] = { + /* Newly read value in R6 was shifted left by 2, so has + * known alignment of 4. + */ +- {18, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, ++ {17, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + /* Added (4n) to packet pointer's (4n+2) var_off, giving + * another (4n+2). + */ + {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"}, +- {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"}, ++ {20, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"}, + /* At the time the word size load is performed from R5, + * its total fixed offset is NET_IP_ALIGN + reg->off (0) + * which is 2. Then the variable offset is (4n+2), so +@@ -448,18 +448,18 @@ static struct bpf_align_test tests[] = { + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .matches = { +- {4, "R5_w=pkt_end(id=0,off=0,imm=0)"}, ++ {3, "R5_w=pkt_end(id=0,off=0,imm=0)"}, + /* (ptr - ptr) << 2 == unknown, (4n) */ +- {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"}, ++ {5, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"}, + /* (4n) + 14 == (4n+2). We blow our bounds, because + * the add could overflow. + */ +- {7, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, ++ {6, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, + /* Checked s>=0 */ + {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, + /* packet pointer + nonnegative (4n+2) */ +- {12, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, +- {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, ++ {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, ++ {12, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, + /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. + * We checked the bounds, but it might have been able + * to overflow if the packet pointer started in the +@@ -502,14 +502,14 @@ static struct bpf_align_test tests[] = { + /* Calculated offset in R6 has unknown value, but known + * alignment of 4. + */ +- {7, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, +- {9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, ++ {6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, ++ {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + /* Adding 14 makes R6 be (4n+2) */ +- {10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, ++ {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + /* New unknown value in R7 is (4n) */ +- {11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, ++ {10, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + /* Subtracting it from R6 blows our unsigned bounds */ +- {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, ++ {11, "R6=inv(id=0,smin_value=-1006,smax_value=1034,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, + /* Checked s>= 0 */ + {14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"}, + /* At the time the word size load is performed from R5, +@@ -556,14 +556,14 @@ static struct bpf_align_test tests[] = { + /* Calculated offset in R6 has unknown value, but known + * alignment of 4. + */ +- {7, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, +- {10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"}, ++ {6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, ++ {9, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"}, + /* Adding 14 makes R6 be (4n+2) */ +- {11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, ++ {10, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, + /* Subtracting from packet pointer overflows ubounds */ + {13, "R5_w=pkt(id=2,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"}, + /* New unknown value in R7 is (4n), >= 76 */ +- {15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, ++ {14, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, + /* Adding it to packet pointer gives nice bounds again */ + {16, "R5_w=pkt(id=3,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, + /* At the time the word size load is performed from R5, +@@ -625,12 +625,15 @@ static int do_test_single(struct bpf_ali + line_ptr = strtok(bpf_vlog_copy, "\n"); + for (i = 0; i < MAX_MATCHES; i++) { + struct bpf_reg_match m = test->matches[i]; ++ int tmp; + + if (!m.match) + break; + while (line_ptr) { + cur_line = -1; + sscanf(line_ptr, "%u: ", &cur_line); ++ if (cur_line == -1) ++ sscanf(line_ptr, "from %u to %u: ", &tmp, &cur_line); + if (cur_line == m.line) + break; + line_ptr = strtok(NULL, "\n"); +@@ -642,7 +645,19 @@ static int do_test_single(struct bpf_ali + printf("%s", bpf_vlog); + break; + } ++ /* Check the next line as well in case the previous line ++ * did not have a corresponding bpf insn. Example: ++ * func#0 @0 ++ * 0: R1=ctx(id=0,off=0,imm=0) R10=fp0 ++ * 0: (b7) r3 = 2 ; R3_w=inv2 ++ */ + if (!strstr(line_ptr, m.match)) { ++ cur_line = -1; ++ line_ptr = strtok(NULL, "\n"); ++ sscanf(line_ptr, "%u: ", &cur_line); ++ } ++ if (cur_line != m.line || !line_ptr || ++ !strstr(line_ptr, m.match)) { + printf("Failed to find match %u: %s\n", + m.line, m.match); + ret = 1; diff --git a/patches.suse/bpf-Silence-coverity-false-positive-warning.patch b/patches.suse/bpf-Silence-coverity-false-positive-warning.patch new file mode 100644 index 0000000..52b167e --- /dev/null +++ b/patches.suse/bpf-Silence-coverity-false-positive-warning.patch @@ -0,0 +1,60 @@ +From: Alexei Starovoitov +Date: Sat, 11 Dec 2021 18:08:19 -0800 +Subject: bpf: Silence coverity false positive warning. +Patch-mainline: v5.17-rc1 +Git-commit: f18a499799dd0f0fdd98cf72d98d3866ce9ac60e +References: jsc#PED-1368 + +Coverity issued the following warning: +6685 cands = bpf_core_add_cands(cands, main_btf, 1); +6686 if (IS_ERR(cands)) +>>> CID 1510300: (RETURN_LOCAL) +>>> Returning pointer "cands" which points to local variable "local_cand". +6687 return cands; + +It's a false positive. +Add ERR_CAST() to silence it. + +Signed-off-by: Alexei Starovoitov +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/btf.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -6656,7 +6656,7 @@ bpf_core_find_cands(struct bpf_core_ctx + + main_btf = bpf_get_btf_vmlinux(); + if (IS_ERR(main_btf)) +- return (void *)main_btf; ++ return ERR_CAST(main_btf); + + local_type = btf_type_by_id(local_btf, local_type_id); + if (!local_type) +@@ -6683,14 +6683,14 @@ bpf_core_find_cands(struct bpf_core_ctx + /* Attempt to find target candidates in vmlinux BTF first */ + cands = bpf_core_add_cands(cands, main_btf, 1); + if (IS_ERR(cands)) +- return cands; ++ return ERR_CAST(cands); + + /* cands is a pointer to kmalloced memory here if cands->cnt > 0 */ + + /* populate cache even when cands->cnt == 0 */ + cc = populate_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE); + if (IS_ERR(cc)) +- return cc; ++ return ERR_CAST(cc); + + /* if vmlinux BTF has any candidate, don't go for module BTFs */ + if (cc->cnt) +@@ -6716,7 +6716,7 @@ check_modules: + cands = bpf_core_add_cands(cands, mod_btf, btf_nr_types(main_btf)); + if (IS_ERR(cands)) { + btf_put(mod_btf); +- return cands; ++ return ERR_CAST(cands); + } + spin_lock_bh(&btf_idr_lock); + btf_put(mod_btf); diff --git a/patches.suse/bpf-Silence-purge_cand_cache-build-warning.patch b/patches.suse/bpf-Silence-purge_cand_cache-build-warning.patch new file mode 100644 index 0000000..51d3738 --- /dev/null +++ b/patches.suse/bpf-Silence-purge_cand_cache-build-warning.patch @@ -0,0 +1,39 @@ +From: Alexei Starovoitov +Date: Mon, 6 Dec 2021 17:48:39 -0800 +Subject: bpf: Silence purge_cand_cache build warning. +Patch-mainline: v5.17-rc1 +Git-commit: 29f2e5bd9439445fe14ba8570b1c9a7ad682df84 +References: jsc#PED-1368 + +When CONFIG_DEBUG_INFO_BTF_MODULES is not set +the following warning can be seen: +kernel/bpf/btf.c:6588:13: warning: 'purge_cand_cache' defined but not used [-Wunused-function] +Fix it. + +Fixes: 1e89106da253 ("bpf: Add bpf_core_add_cands() and wire it into bpf_core_apply_relo_insn().") +Signed-off-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211207014839.6976-1-alexei.starovoitov@gmail.com +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/btf.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -6553,6 +6553,7 @@ static struct bpf_cand_cache *populate_c + return new_cands; + } + ++#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + static void __purge_cand_cache(struct btf *btf, struct bpf_cand_cache **cache, + int cache_size) + { +@@ -6591,6 +6592,7 @@ static void purge_cand_cache(struct btf + __purge_cand_cache(btf, module_cand_cache, MODULE_CAND_CACHE_SIZE); + mutex_unlock(&cand_cache_mutex); + } ++#endif + + static struct bpf_cand_cache * + bpf_core_add_cands(struct bpf_cand_cache *cands, const struct btf *targ_btf, diff --git a/patches.suse/bpf-Support-BTF_KIND_TYPE_TAG-for-btf_type_tag-attri.patch b/patches.suse/bpf-Support-BTF_KIND_TYPE_TAG-for-btf_type_tag-attri.patch new file mode 100644 index 0000000..f93691e --- /dev/null +++ b/patches.suse/bpf-Support-BTF_KIND_TYPE_TAG-for-btf_type_tag-attri.patch @@ -0,0 +1,141 @@ +From: Yonghong Song +Date: Thu, 11 Nov 2021 17:26:09 -0800 +Subject: bpf: Support BTF_KIND_TYPE_TAG for btf_type_tag attributes +Patch-mainline: v5.17-rc1 +Git-commit: 8c42d2fa4eeab6c37a0b1b1aa7a2715248ef4f34 +References: jsc#PED-1368 + +LLVM patches ([1] for clang, [2] and [3] for BPF backend) +added support for btf_type_tag attributes. This patch +added support for the kernel. + +The main motivation for btf_type_tag is to bring kernel +annotations __user, __rcu etc. to btf. With such information +available in btf, bpf verifier can detect mis-usages +and reject the program. For example, for __user tagged pointer, +developers can then use proper helper like bpf_probe_read_user() +etc. to read the data. + +BTF_KIND_TYPE_TAG may also useful for other tracing +facility where instead of to require user to specify +kernel/user address type, the kernel can detect it +by itself with btf. + + [1] https://reviews.llvm.org/D111199 + [2] https://reviews.llvm.org/D113222 + [3] https://reviews.llvm.org/D113496 + +Signed-off-by: Yonghong Song +Signed-off-by: Alexei Starovoitov +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211112012609.1505032-1-yhs@fb.com +Acked-by: Shung-Hsi Yu +--- + include/uapi/linux/btf.h | 3 ++- + kernel/bpf/btf.c | 14 +++++++++++++- + tools/include/uapi/linux/btf.h | 3 ++- + 3 files changed, 17 insertions(+), 3 deletions(-) + +--- a/include/uapi/linux/btf.h ++++ b/include/uapi/linux/btf.h +@@ -43,7 +43,7 @@ struct btf_type { + * "size" tells the size of the type it is describing. + * + * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, +- * FUNC, FUNC_PROTO, VAR and DECL_TAG. ++ * FUNC, FUNC_PROTO, VAR, DECL_TAG and TYPE_TAG. + * "type" is a type_id referring to another type. + */ + union { +@@ -75,6 +75,7 @@ enum { + BTF_KIND_DATASEC = 15, /* Section */ + BTF_KIND_FLOAT = 16, /* Floating point */ + BTF_KIND_DECL_TAG = 17, /* Decl Tag */ ++ BTF_KIND_TYPE_TAG = 18, /* Type Tag */ + + NR_BTF_KINDS, + BTF_KIND_MAX = NR_BTF_KINDS - 1, +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -282,6 +282,7 @@ static const char * const btf_kind_str[N + [BTF_KIND_DATASEC] = "DATASEC", + [BTF_KIND_FLOAT] = "FLOAT", + [BTF_KIND_DECL_TAG] = "DECL_TAG", ++ [BTF_KIND_TYPE_TAG] = "TYPE_TAG", + }; + + const char *btf_type_str(const struct btf_type *t) +@@ -418,6 +419,7 @@ static bool btf_type_is_modifier(const s + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: ++ case BTF_KIND_TYPE_TAG: + return true; + } + +@@ -1737,6 +1739,7 @@ __btf_resolve_size(const struct btf *btf + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: ++ case BTF_KIND_TYPE_TAG: + id = type->type; + type = btf_type_by_id(btf, type->type); + break; +@@ -2345,6 +2348,8 @@ static int btf_ref_type_check_meta(struc + const struct btf_type *t, + u32 meta_left) + { ++ const char *value; ++ + if (btf_type_vlen(t)) { + btf_verifier_log_type(env, t, "vlen != 0"); + return -EINVAL; +@@ -2360,7 +2365,7 @@ static int btf_ref_type_check_meta(struc + return -EINVAL; + } + +- /* typedef type must have a valid name, and other ref types, ++ /* typedef/type_tag type must have a valid name, and other ref types, + * volatile, const, restrict, should have a null name. + */ + if (BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF) { +@@ -2369,6 +2374,12 @@ static int btf_ref_type_check_meta(struc + btf_verifier_log_type(env, t, "Invalid name"); + return -EINVAL; + } ++ } else if (BTF_INFO_KIND(t->info) == BTF_KIND_TYPE_TAG) { ++ value = btf_name_by_offset(env->btf, t->name_off); ++ if (!value || !value[0]) { ++ btf_verifier_log_type(env, t, "Invalid name"); ++ return -EINVAL; ++ } + } else { + if (t->name_off) { + btf_verifier_log_type(env, t, "Invalid name"); +@@ -4059,6 +4070,7 @@ static const struct btf_kind_operations + [BTF_KIND_DATASEC] = &datasec_ops, + [BTF_KIND_FLOAT] = &float_ops, + [BTF_KIND_DECL_TAG] = &decl_tag_ops, ++ [BTF_KIND_TYPE_TAG] = &modifier_ops, + }; + + static s32 btf_check_meta(struct btf_verifier_env *env, +--- a/tools/include/uapi/linux/btf.h ++++ b/tools/include/uapi/linux/btf.h +@@ -43,7 +43,7 @@ struct btf_type { + * "size" tells the size of the type it is describing. + * + * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, +- * FUNC, FUNC_PROTO, VAR and DECL_TAG. ++ * FUNC, FUNC_PROTO, VAR, DECL_TAG and TYPE_TAG. + * "type" is a type_id referring to another type. + */ + union { +@@ -75,6 +75,7 @@ enum { + BTF_KIND_DATASEC = 15, /* Section */ + BTF_KIND_FLOAT = 16, /* Floating point */ + BTF_KIND_DECL_TAG = 17, /* Decl Tag */ ++ BTF_KIND_TYPE_TAG = 18, /* Type Tag */ + + NR_BTF_KINDS, + BTF_KIND_MAX = NR_BTF_KINDS - 1, diff --git a/patches.suse/bpf-Use-VM_MAP-instead-of-VM_ALLOC-for-ringbuf.patch b/patches.suse/bpf-Use-VM_MAP-instead-of-VM_ALLOC-for-ringbuf.patch new file mode 100644 index 0000000..8d867fb --- /dev/null +++ b/patches.suse/bpf-Use-VM_MAP-instead-of-VM_ALLOC-for-ringbuf.patch @@ -0,0 +1,40 @@ +From: Hou Tao +Date: Wed, 2 Feb 2022 14:01:58 +0800 +Subject: bpf: Use VM_MAP instead of VM_ALLOC for ringbuf +Patch-mainline: v5.17-rc3 +Git-commit: b293dcc473d22a62dc6d78de2b15e4f49515db56 +References: jsc#PED-1368 + +After commit 2fd3fb0be1d1 ("kasan, vmalloc: unpoison VM_ALLOC pages +after mapping"), non-VM_ALLOC mappings will be marked as accessible +in __get_vm_area_node() when KASAN is enabled. But now the flag for +ringbuf area is VM_ALLOC, so KASAN will complain out-of-bound access +after vmap() returns. Because the ringbuf area is created by mapping +allocated pages, so use VM_MAP instead. + +After the change, info in /proc/vmallocinfo also changes from + [start]-[end] 24576 ringbuf_map_alloc+0x171/0x290 vmalloc user +to + [start]-[end] 24576 ringbuf_map_alloc+0x171/0x290 vmap user + +Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") +Reported-by: syzbot+5ad567a418794b9b5983@syzkaller.appspotmail.com +Signed-off-by: Hou Tao +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20220202060158.6260-1-houtao1@huawei.com +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/ringbuf.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/bpf/ringbuf.c ++++ b/kernel/bpf/ringbuf.c +@@ -104,7 +104,7 @@ static struct bpf_ringbuf *bpf_ringbuf_a + } + + rb = vmap(pages, nr_meta_pages + 2 * nr_data_pages, +- VM_ALLOC | VM_USERMAP, PAGE_KERNEL); ++ VM_MAP | VM_USERMAP, PAGE_KERNEL); + if (rb) { + kmemleak_not_leak(pages); + rb->pages = pages; diff --git a/patches.suse/bpf-Use-kmemdup-to-replace-kmalloc-memcpy.patch b/patches.suse/bpf-Use-kmemdup-to-replace-kmalloc-memcpy.patch new file mode 100644 index 0000000..38c7928 --- /dev/null +++ b/patches.suse/bpf-Use-kmemdup-to-replace-kmalloc-memcpy.patch @@ -0,0 +1,36 @@ +From: Jiapeng Chong +Date: Thu, 9 Dec 2021 14:21:22 +0800 +Subject: bpf: Use kmemdup() to replace kmalloc + memcpy +Patch-mainline: v5.17-rc1 +Git-commit: 4674f21071b935c237217ac02cb310522d6ad95d +References: jsc#PED-1368 + +Eliminate the follow coccicheck warning: + +./kernel/bpf/btf.c:6537:13-20: WARNING opportunity for kmemdup. + +Reported-by: Abaci Robot +Signed-off-by: Jiapeng Chong +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/1639030882-92383-1-git-send-email-jiapeng.chong@linux.alibaba.com +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/btf.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -6534,12 +6534,11 @@ static struct bpf_cand_cache *populate_c + bpf_free_cands_from_cache(*cc); + *cc = NULL; + } +- new_cands = kmalloc(sizeof_cands(cands->cnt), GFP_KERNEL); ++ new_cands = kmemdup(cands, sizeof_cands(cands->cnt), GFP_KERNEL); + if (!new_cands) { + bpf_free_cands(cands); + return ERR_PTR(-ENOMEM); + } +- memcpy(new_cands, cands, sizeof_cands(cands->cnt)); + /* strdup the name, since it will stay in cache. + * the cands->name points to strings in prog's BTF and the prog can be unloaded. + */ diff --git a/patches.suse/bpf-Use-struct_size-helper.patch b/patches.suse/bpf-Use-struct_size-helper.patch new file mode 100644 index 0000000..4a7f149 --- /dev/null +++ b/patches.suse/bpf-Use-struct_size-helper.patch @@ -0,0 +1,53 @@ +From: Xiu Jianfeng +Date: Mon, 20 Dec 2021 19:30:48 +0800 +Subject: bpf: Use struct_size() helper +Patch-mainline: v5.17-rc1 +Git-commit: 0dd668d2080c46cf914e131f341fa114a34c5a20 +References: jsc#PED-1368 + +In an effort to avoid open-coded arithmetic in the kernel, use the +struct_size() helper instead of open-coded calculation. + +Signed-off-by: Xiu Jianfeng +Signed-off-by: Andrii Nakryiko +Acked-by: Yonghong Song +Link: https://github.com/KSPP/linux/issues/160 +Link: https://lore.kernel.org/bpf/20211220113048.2859-1-xiujianfeng@huawei.com +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/local_storage.c | 3 +-- + kernel/bpf/reuseport_array.c | 6 +----- + 2 files changed, 2 insertions(+), 7 deletions(-) + +--- a/kernel/bpf/local_storage.c ++++ b/kernel/bpf/local_storage.c +@@ -163,8 +163,7 @@ static int cgroup_storage_update_elem(st + return 0; + } + +- new = bpf_map_kmalloc_node(map, sizeof(struct bpf_storage_buffer) + +- map->value_size, ++ new = bpf_map_kmalloc_node(map, struct_size(new, data, map->value_size), + __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN, + map->numa_node); + if (!new) +--- a/kernel/bpf/reuseport_array.c ++++ b/kernel/bpf/reuseport_array.c +@@ -152,16 +152,12 @@ static struct bpf_map *reuseport_array_a + { + int numa_node = bpf_map_attr_numa_node(attr); + struct reuseport_array *array; +- u64 array_size; + + if (!bpf_capable()) + return ERR_PTR(-EPERM); + +- array_size = sizeof(*array); +- array_size += (u64)attr->max_entries * sizeof(struct sock *); +- + /* allocate all map elements and zero-initialize them */ +- array = bpf_map_area_alloc(array_size, numa_node); ++ array = bpf_map_area_alloc(struct_size(array, ptrs, attr->max_entries), numa_node); + if (!array) + return ERR_PTR(-ENOMEM); + diff --git a/patches.suse/bpf-arm64-Use-emit_addr_mov_i64-for-BPF_PSEUDO_FUNC.patch b/patches.suse/bpf-arm64-Use-emit_addr_mov_i64-for-BPF_PSEUDO_FUNC.patch new file mode 100644 index 0000000..3c70dd9 --- /dev/null +++ b/patches.suse/bpf-arm64-Use-emit_addr_mov_i64-for-BPF_PSEUDO_FUNC.patch @@ -0,0 +1,52 @@ +From: Hou Tao +Date: Fri, 31 Dec 2021 23:10:18 +0800 +Subject: bpf, arm64: Use emit_addr_mov_i64() for BPF_PSEUDO_FUNC +Patch-mainline: v5.17-rc1 +Git-commit: e4a41c2c1fa916547e63440c73a51a5eb06247af +References: jsc#PED-1368 + +The following error is reported when running "./test_progs -t for_each" +under arm64: + + bpf_jit: multi-func JIT bug 58 != 56 + [...] + JIT doesn't support bpf-to-bpf calls + +The root cause is the size of BPF_PSEUDO_FUNC instruction increases +from 2 to 3 after the address of called bpf-function is settled and +there are two bpf-to-bpf calls in test_pkt_access. The generated +instructions are shown below: + + 0x48: 21 00 C0 D2 movz x1, #0x1, lsl #32 + 0x4c: 21 00 80 F2 movk x1, #0x1 + + 0x48: E1 3F C0 92 movn x1, #0x1ff, lsl #32 + 0x4c: 41 FE A2 F2 movk x1, #0x17f2, lsl #16 + 0x50: 81 70 9F F2 movk x1, #0xfb84 + +Fixing it by using emit_addr_mov_i64() for BPF_PSEUDO_FUNC, so +the size of jited image will not change. + +Fixes: 69c087ba6225 ("bpf: Add bpf_for_each_map_elem() helper") +Signed-off-by: Hou Tao +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211231151018.3781550-1-houtao1@huawei.com +Acked-by: Shung-Hsi Yu +--- + arch/arm64/net/bpf_jit_comp.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/arch/arm64/net/bpf_jit_comp.c ++++ b/arch/arm64/net/bpf_jit_comp.c +@@ -789,7 +789,10 @@ emit_cond_jmp: + u64 imm64; + + imm64 = (u64)insn1.imm << 32 | (u32)imm; +- emit_a64_mov_i64(dst, imm64, ctx); ++ if (bpf_pseudo_func(insn)) ++ emit_addr_mov_i64(dst, imm64, ctx); ++ else ++ emit_a64_mov_i64(dst, imm64, ctx); + + return 1; + } diff --git a/patches.suse/bpf-docs-Add-a-setion-to-explain-the-basic-instructi.patch b/patches.suse/bpf-docs-Add-a-setion-to-explain-the-basic-instructi.patch new file mode 100644 index 0000000..7e3ddce --- /dev/null +++ b/patches.suse/bpf-docs-Add-a-setion-to-explain-the-basic-instructi.patch @@ -0,0 +1,44 @@ +From: Christoph Hellwig +Date: Mon, 3 Jan 2022 19:35:51 +0100 +Subject: bpf, docs: Add a setion to explain the basic instruction encoding +Patch-mainline: v5.17-rc1 +Git-commit: 62e4683849b6516c71e91f36e4fc0393a5883cfb +References: jsc#PED-1368 + +The eBPF instruction set document does not currently document the basic +instruction encoding. Add a section to do that. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220103183556.41040-2-hch@lst.de +Acked-by: Shung-Hsi Yu +--- + Documentation/bpf/instruction-set.rst | 16 +++++++++++++++- + 1 file changed, 15 insertions(+), 1 deletion(-) + +--- a/Documentation/bpf/instruction-set.rst ++++ b/Documentation/bpf/instruction-set.rst +@@ -19,8 +19,22 @@ The eBPF calling convention is defined a + R0 - R5 are scratch registers and eBPF programs needs to spill/fill them if + necessary across calls. + ++Instruction encoding ++==================== ++ ++eBPF uses 64-bit instructions with the following encoding: ++ ++ ============= ======= =============== ==================== ============ ++ 32 bits (MSB) 16 bits 4 bits 4 bits 8 bits (LSB) ++ ============= ======= =============== ==================== ============ ++ immediate offset source register destination register opcode ++ ============= ======= =============== ==================== ============ ++ ++Note that most instructions do not use all of the fields. ++Unused fields shall be cleared to zero. ++ + Instruction classes +-=================== ++------------------- + + The three LSB bits of the 'opcode' field store the instruction class: + diff --git a/patches.suse/bpf-docs-Add-subsections-for-ALU-and-JMP-instruction.patch b/patches.suse/bpf-docs-Add-subsections-for-ALU-and-JMP-instruction.patch new file mode 100644 index 0000000..66dd758 --- /dev/null +++ b/patches.suse/bpf-docs-Add-subsections-for-ALU-and-JMP-instruction.patch @@ -0,0 +1,93 @@ +From: Christoph Hellwig +Date: Mon, 3 Jan 2022 19:35:52 +0100 +Subject: bpf, docs: Add subsections for ALU and JMP instructions +Patch-mainline: v5.17-rc1 +Git-commit: be3193cded9d5c030be1713bf52d307427e88d19 +References: jsc#PED-1368 + +Add a little more stucture to the ALU/JMP documentation with sections and +improve the example text. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220103183556.41040-3-hch@lst.de +Acked-by: Shung-Hsi Yu +--- + Documentation/bpf/instruction-set.rst | 52 ++++++++++++++++++++-------------- + 1 file changed, 32 insertions(+), 20 deletions(-) + +--- a/Documentation/bpf/instruction-set.rst ++++ b/Documentation/bpf/instruction-set.rst +@@ -74,7 +74,13 @@ The 4th bit encodes the source operand: + + The four MSB bits store the operation code. + +-For class BPF_ALU or BPF_ALU64: ++ ++Arithmetic instructions ++----------------------- ++ ++BPF_ALU uses 32-bit wide operands while BPF_ALU64 uses 64-bit wide operands for ++otherwise identical operations. ++The code field encodes the operation as below: + + ======== ===== ========================= + code value description +@@ -95,7 +101,29 @@ For class BPF_ALU or BPF_ALU64: + BPF_END 0xd0 endianness conversion + ======== ===== ========================= + +-For class BPF_JMP or BPF_JMP32: ++BPF_ADD | BPF_X | BPF_ALU means:: ++ ++ dst_reg = (u32) dst_reg + (u32) src_reg; ++ ++BPF_ADD | BPF_X | BPF_ALU64 means:: ++ ++ dst_reg = dst_reg + src_reg ++ ++BPF_XOR | BPF_K | BPF_ALU means:: ++ ++ src_reg = (u32) src_reg ^ (u32) imm32 ++ ++BPF_XOR | BPF_K | BPF_ALU64 means:: ++ ++ src_reg = src_reg ^ imm32 ++ ++ ++Jump instructions ++----------------- ++ ++BPF_JMP32 uses 32-bit wide operands while BPF_JMP uses 64-bit wide operands for ++otherwise identical operations. ++The code field encodes the operation as below: + + ======== ===== ========================= + code value description +@@ -116,24 +144,8 @@ For class BPF_JMP or BPF_JMP32: + BPF_JSLE 0xd0 signed '<=' + ======== ===== ========================= + +-So BPF_ADD | BPF_X | BPF_ALU means:: +- +- dst_reg = (u32) dst_reg + (u32) src_reg; +- +-Similarly, BPF_XOR | BPF_K | BPF_ALU means:: +- +- src_reg = (u32) src_reg ^ (u32) imm32 +- +-eBPF is using BPF_MOV | BPF_X | BPF_ALU to represent A = B moves. BPF_ALU64 +-is used to mean exactly the same operations as BPF_ALU, but with 64-bit wide +-operands instead. So BPF_ADD | BPF_X | BPF_ALU64 means 64-bit addition, i.e.:: +- +- dst_reg = dst_reg + src_reg +- +-BPF_JMP | BPF_EXIT means function exit only. The eBPF program needs to store +-the return value into register R0 before doing a BPF_EXIT. Class 6 is used as +-BPF_JMP32 to mean exactly the same operations as BPF_JMP, but with 32-bit wide +-operands for the comparisons instead. ++The eBPF program needs to store the return value into register R0 before doing a ++BPF_EXIT. + + + Load and store instructions diff --git a/patches.suse/bpf-docs-Change-underline-in-btf-to-match-style-guid.patch b/patches.suse/bpf-docs-Change-underline-in-btf-to-match-style-guid.patch new file mode 100644 index 0000000..a0b6bde --- /dev/null +++ b/patches.suse/bpf-docs-Change-underline-in-btf-to-match-style-guid.patch @@ -0,0 +1,203 @@ +From: Dave Tucker +Date: Fri, 12 Nov 2021 21:17:22 +0000 +Subject: bpf, docs: Change underline in btf to match style guide +Patch-mainline: v5.17-rc1 +Git-commit: 3ff36bffaf3545d46e7dedcd8b89e62591de246d +References: jsc#PED-1368 + +This changes the type of underline used to follow the guidelines in +Documentation/doc-guide/sphinx.rst which also ensures that the headings +are rendered at the correct level in the HTML sidebar + +Signed-off-by: Dave Tucker +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/981b27485cc294206480df36fca46817e2553e39.1636749493.git.dave@dtucker.co.uk +Acked-by: Shung-Hsi Yu +--- + Documentation/bpf/btf.rst | 44 ++++++++++++++++++++++---------------------- + 1 file changed, 22 insertions(+), 22 deletions(-) + +--- a/Documentation/bpf/btf.rst ++++ b/Documentation/bpf/btf.rst +@@ -3,7 +3,7 @@ BPF Type Format (BTF) + ===================== + + 1. Introduction +-*************** ++=============== + + BTF (BPF Type Format) is the metadata format which encodes the debug info + related to BPF program/map. The name BTF was used initially to describe data +@@ -30,7 +30,7 @@ sections are discussed in details in :re + .. _BTF_Type_String: + + 2. BTF Type and String Encoding +-******************************* ++=============================== + + The file ``include/uapi/linux/btf.h`` provides high-level definition of how + types/strings are encoded. +@@ -57,13 +57,13 @@ little-endian target. The ``btf_header`` + generated. + + 2.1 String Encoding +-=================== ++------------------- + + The first string in the string section must be a null string. The rest of + string table is a concatenation of other null-terminated strings. + + 2.2 Type Encoding +-================= ++----------------- + + The type id ``0`` is reserved for ``void`` type. The type section is parsed + sequentially and type id is assigned to each recognized type starting from id +@@ -504,7 +504,7 @@ valid index (starting from 0) pointing t + * ``type``: the type with ``btf_type_tag`` attribute + + 3. BTF Kernel API +-***************** ++================= + + The following bpf syscall command involves BTF: + * BPF_BTF_LOAD: load a blob of BTF data into kernel +@@ -547,14 +547,14 @@ The workflow typically looks like: + + + 3.1 BPF_BTF_LOAD +-================ ++---------------- + + Load a blob of BTF data into kernel. A blob of data, described in + :ref:`BTF_Type_String`, can be directly loaded into the kernel. A ``btf_fd`` + is returned to a userspace. + + 3.2 BPF_MAP_CREATE +-================== ++------------------ + + A map can be created with ``btf_fd`` and specified key/value type id.:: + +@@ -581,7 +581,7 @@ automatically. + .. _BPF_Prog_Load: + + 3.3 BPF_PROG_LOAD +-================= ++----------------- + + During prog_load, func_info and line_info can be passed to kernel with proper + values for the following attributes: +@@ -631,7 +631,7 @@ For line_info, the line number and colum + #define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff) + + 3.4 BPF_{PROG,MAP}_GET_NEXT_ID +-============================== ++------------------------------ + + In kernel, every loaded program, map or btf has a unique id. The id won't + change during the lifetime of a program, map, or btf. +@@ -641,13 +641,13 @@ each command, to user space, for bpf pro + inspection tool can inspect all programs and maps. + + 3.5 BPF_{PROG,MAP}_GET_FD_BY_ID +-=============================== ++------------------------------- + + An introspection tool cannot use id to get details about program or maps. + A file descriptor needs to be obtained first for reference-counting purpose. + + 3.6 BPF_OBJ_GET_INFO_BY_FD +-========================== ++-------------------------- + + Once a program/map fd is acquired, an introspection tool can get the detailed + information from kernel about this fd, some of which are BTF-related. For +@@ -656,7 +656,7 @@ example, ``bpf_map_info`` returns ``btf_ + bpf byte codes, and jited_line_info. + + 3.7 BPF_BTF_GET_FD_BY_ID +-======================== ++------------------------ + + With ``btf_id`` obtained in ``bpf_map_info`` and ``bpf_prog_info``, bpf + syscall command BPF_BTF_GET_FD_BY_ID can retrieve a btf fd. Then, with +@@ -668,10 +668,10 @@ tool has full btf knowledge and is able + func signatures and line info, along with byte/jit codes. + + 4. ELF File Format Interface +-**************************** ++============================ + + 4.1 .BTF section +-================ ++---------------- + + The .BTF section contains type and string data. The format of this section is + same as the one describe in :ref:`BTF_Type_String`. +@@ -679,7 +679,7 @@ same as the one describe in :ref:`BTF_Ty + .. _BTF_Ext_Section: + + 4.2 .BTF.ext section +-==================== ++-------------------- + + The .BTF.ext section encodes func_info and line_info which needs loader + manipulation before loading into the kernel. +@@ -743,7 +743,7 @@ bpf_insn``. For ELF API, the ``insn_off` + beginning of section (``btf_ext_info_sec->sec_name_off``). + + 4.2 .BTF_ids section +-==================== ++-------------------- + + The .BTF_ids section encodes BTF ID values that are used within the kernel. + +@@ -804,10 +804,10 @@ All the BTF ID lists and sets are compil + resolved during the linking phase of kernel build by ``resolve_btfids`` tool. + + 5. Using BTF +-************ ++============ + + 5.1 bpftool map pretty print +-============================ ++---------------------------- + + With BTF, the map key/value can be printed based on fields rather than simply + raw bytes. This is especially valuable for large structure or if your data +@@ -849,7 +849,7 @@ bpftool is able to pretty print like bel + ] + + 5.2 bpftool prog dump +-===================== ++--------------------- + + The following is an example showing how func_info and line_info can help prog + dump with better kernel symbol names, function prototypes and line +@@ -883,7 +883,7 @@ information.:: + [...] + + 5.3 Verifier Log +-================ ++---------------- + + The following is an example of how line_info can help debugging verification + failure.:: +@@ -909,7 +909,7 @@ failure.:: + R2 offset is outside of the packet + + 6. BTF Generation +-***************** ++================= + + You need latest pahole + +@@ -1016,6 +1016,6 @@ format.:: + .long 8206 # Line 8 Col 14 + + 7. Testing +-********** ++========== + + Kernel bpf selftest `test_btf.c` provides extensive set of BTF-related tests. diff --git a/patches.suse/bpf-docs-Document-the-opcode-classes.patch b/patches.suse/bpf-docs-Document-the-opcode-classes.patch new file mode 100644 index 0000000..0d32359 --- /dev/null +++ b/patches.suse/bpf-docs-Document-the-opcode-classes.patch @@ -0,0 +1,50 @@ +From: Christoph Hellwig +Date: Mon, 3 Jan 2022 19:35:53 +0100 +Subject: bpf, docs: Document the opcode classes +Patch-mainline: v5.17-rc1 +Git-commit: 894cda554c3c3dc836f3cc873c47a465ba9433b4 +References: jsc#PED-1368 + +Add a description for each opcode class. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220103183556.41040-4-hch@lst.de +Acked-by: Shung-Hsi Yu +--- + Documentation/bpf/instruction-set.rst | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +--- a/Documentation/bpf/instruction-set.rst ++++ b/Documentation/bpf/instruction-set.rst +@@ -38,18 +38,18 @@ Instruction classes + + The three LSB bits of the 'opcode' field store the instruction class: + +- ========= ===== +- class value +- ========= ===== +- BPF_LD 0x00 +- BPF_LDX 0x01 +- BPF_ST 0x02 +- BPF_STX 0x03 +- BPF_ALU 0x04 +- BPF_JMP 0x05 +- BPF_JMP32 0x06 +- BPF_ALU64 0x07 +- ========= ===== ++ ========= ===== =============================== ++ class value description ++ ========= ===== =============================== ++ BPF_LD 0x00 non-standard load operations ++ BPF_LDX 0x01 load into register operations ++ BPF_ST 0x02 store from immediate operations ++ BPF_STX 0x03 store from register operations ++ BPF_ALU 0x04 32-bit arithmetic operations ++ BPF_JMP 0x05 64-bit jump operations ++ BPF_JMP32 0x06 32-bit jump operations ++ BPF_ALU64 0x07 64-bit arithmetic operations ++ ========= ===== =============================== + + Arithmetic and jump instructions + ================================ diff --git a/patches.suse/bpf-docs-Fix-ordering-of-bpf-documentation.patch b/patches.suse/bpf-docs-Fix-ordering-of-bpf-documentation.patch new file mode 100644 index 0000000..d1fd50b --- /dev/null +++ b/patches.suse/bpf-docs-Fix-ordering-of-bpf-documentation.patch @@ -0,0 +1,306 @@ +From: Dave Tucker +Date: Fri, 12 Nov 2021 21:17:24 +0000 +Subject: bpf, docs: Fix ordering of bpf documentation +Patch-mainline: v5.17-rc1 +Git-commit: 5931d9a3d0529dc803c792a10e52f0de1d0b9991 +References: jsc#PED-1368 + +This commit fixes the display of the BPF documentation in the sidebar +when rendered as HTML. + +Before this patch, the sidebar would render as follows for some +sections: + +| BPF Documentation + |- BPF Type Format (BTF) + |- BPF Type Format (BTF) + +This was due to creating a heading in index.rst followed by +a sphinx toctree, where the file referenced carries the same +title as the section heading. + +To fix this I applied a pattern that has been established in other +subfolders of Documentation: + +1. Re-wrote index.rst to have a single toctree +2. Split the sections out in to their own files + +Additionally maps.rst and programs.rst make use of a glob pattern to +include map_* or prog_* rst files in their toctree, meaning future map +or program type documentation will be automatically included. + +Signed-off-by: Dave Tucker +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/1a1eed800e7b9dc13b458de113a489641519b0cc.1636749493.git.dave@dtucker.co.uk +Acked-by: Shung-Hsi Yu +--- + Documentation/bpf/faq.rst | 11 ++++ + Documentation/bpf/helpers.rst | 7 +++ + Documentation/bpf/index.rst | 97 ++++-------------------------- + Documentation/bpf/libbpf/index.rst | 4 +- + Documentation/bpf/maps.rst | 9 +++ + Documentation/bpf/other.rst | 9 +++ + Documentation/bpf/programs.rst | 9 +++ + Documentation/bpf/syscall_api.rst | 11 ++++ + Documentation/bpf/test_debug.rst | 9 +++ + 9 files changed, 80 insertions(+), 86 deletions(-) + create mode 100644 Documentation/bpf/faq.rst + create mode 100644 Documentation/bpf/helpers.rst + create mode 100644 Documentation/bpf/maps.rst + create mode 100644 Documentation/bpf/other.rst + create mode 100644 Documentation/bpf/programs.rst + create mode 100644 Documentation/bpf/syscall_api.rst + create mode 100644 Documentation/bpf/test_debug.rst + +diff --git a/Documentation/bpf/faq.rst b/Documentation/bpf/faq.rst +new file mode 100644 +index 000000000000..a622602ce9ad +--- /dev/null ++++ b/Documentation/bpf/faq.rst +@@ -0,0 +1,11 @@ ++================================ ++Frequently asked questions (FAQ) ++================================ ++ ++Two sets of Questions and Answers (Q&A) are maintained. ++ ++.. toctree:: ++ :maxdepth: 1 ++ ++ bpf_design_QA ++ bpf_devel_QA +diff --git a/Documentation/bpf/helpers.rst b/Documentation/bpf/helpers.rst +new file mode 100644 +index 000000000000..c4ee0cc20dec +--- /dev/null ++++ b/Documentation/bpf/helpers.rst +@@ -0,0 +1,7 @@ ++Helper functions ++================ ++ ++* `bpf-helpers(7)`_ maintains a list of helpers available to eBPF programs. ++ ++.. Links ++.. _bpf-helpers(7): https://man7.org/linux/man-pages/man7/bpf-helpers.7.html +\ No newline at end of file +diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst +index 37f273a7e8b6..413f50101eca 100644 +--- a/Documentation/bpf/index.rst ++++ b/Documentation/bpf/index.rst +@@ -12,97 +12,26 @@ BPF instruction-set. + The Cilium project also maintains a `BPF and XDP Reference Guide`_ + that goes into great technical depth about the BPF Architecture. + +-libbpf +-====== +- +-Documentation/bpf/libbpf/libbpf.rst is a userspace library for loading and interacting with bpf programs. +- +-BPF Type Format (BTF) +-===================== +- + .. toctree:: + :maxdepth: 1 + ++ libbpf/index + btf +- +- +-Frequently asked questions (FAQ) +-================================ +- +-Two sets of Questions and Answers (Q&A) are maintained. +- +-.. toctree:: +- :maxdepth: 1 +- +- bpf_design_QA +- bpf_devel_QA +- +-Syscall API +-=========== +- +-The primary info for the bpf syscall is available in the `man-pages`_ +-for `bpf(2)`_. For more information about the userspace API, see +-Documentation/userspace-api/ebpf/index.rst. +- +-Helper functions +-================ +- +-* `bpf-helpers(7)`_ maintains a list of helpers available to eBPF programs. +- +- +-Program types +-============= +- +-.. toctree:: +- :maxdepth: 1 +- +- prog_cgroup_sockopt +- prog_cgroup_sysctl +- prog_flow_dissector +- bpf_lsm +- prog_sk_lookup +- +- +-Map types +-========= +- +-.. toctree:: +- :maxdepth: 1 +- +- map_cgroup_storage +- +- +-Testing and debugging BPF +-========================= +- +-.. toctree:: +- :maxdepth: 1 +- +- drgn +- s390 +- +- +-Licensing +-========= +- +-.. toctree:: +- :maxdepth: 1 +- ++ faq ++ syscall_api ++ helpers ++ programs ++ maps + bpf_licensing ++ test_debug ++ other + ++.. only:: subproject and html + +-Other +-===== ++ Indices ++ ======= + +-.. toctree:: +- :maxdepth: 1 +- +- ringbuf +- llvm_reloc ++ * :ref:`genindex` + + .. Links: +-.. _networking-filter: ../networking/filter.rst +-.. _man-pages: https://www.kernel.org/doc/man-pages/ +-.. _bpf(2): https://man7.org/linux/man-pages/man2/bpf.2.html +-.. _bpf-helpers(7): https://man7.org/linux/man-pages/man7/bpf-helpers.7.html +-.. _BPF and XDP Reference Guide: https://docs.cilium.io/en/latest/bpf/ ++.. _BPF and XDP Reference Guide: https://docs.cilium.io/en/latest/bpf/ +\ No newline at end of file +diff --git a/Documentation/bpf/libbpf/index.rst b/Documentation/bpf/libbpf/index.rst +index 4f8adfc3ab83..4e8c656b539a 100644 +--- a/Documentation/bpf/libbpf/index.rst ++++ b/Documentation/bpf/libbpf/index.rst +@@ -3,8 +3,6 @@ + libbpf + ====== + +-For API documentation see the `versioned API documentation site `_. +- + .. toctree:: + :maxdepth: 1 + +@@ -14,6 +12,8 @@ For API documentation see the `versioned API documentation site `_. ++ + All general BPF questions, including kernel functionality, libbpf APIs and + their application, should be sent to bpf@vger.kernel.org mailing list. + You can `subscribe `_ to the +diff --git a/Documentation/bpf/maps.rst b/Documentation/bpf/maps.rst +new file mode 100644 +index 000000000000..2084b0e7cde8 +--- /dev/null ++++ b/Documentation/bpf/maps.rst +@@ -0,0 +1,9 @@ ++========= ++Map Types ++========= ++ ++.. toctree:: ++ :maxdepth: 1 ++ :glob: ++ ++ map_* +\ No newline at end of file +diff --git a/Documentation/bpf/other.rst b/Documentation/bpf/other.rst +new file mode 100644 +index 000000000000..3d61963403b4 +--- /dev/null ++++ b/Documentation/bpf/other.rst +@@ -0,0 +1,9 @@ ++===== ++Other ++===== ++ ++.. toctree:: ++ :maxdepth: 1 ++ ++ ringbuf ++ llvm_reloc +\ No newline at end of file +diff --git a/Documentation/bpf/programs.rst b/Documentation/bpf/programs.rst +new file mode 100644 +index 000000000000..620eb667ac7a +--- /dev/null ++++ b/Documentation/bpf/programs.rst +@@ -0,0 +1,9 @@ ++============= ++Program Types ++============= ++ ++.. toctree:: ++ :maxdepth: 1 ++ :glob: ++ ++ prog_* +diff --git a/Documentation/bpf/syscall_api.rst b/Documentation/bpf/syscall_api.rst +new file mode 100644 +index 000000000000..f0a1dff087ad +--- /dev/null ++++ b/Documentation/bpf/syscall_api.rst +@@ -0,0 +1,11 @@ ++=========== ++Syscall API ++=========== ++ ++The primary info for the bpf syscall is available in the `man-pages`_ ++for `bpf(2)`_. For more information about the userspace API, see ++Documentation/userspace-api/ebpf/index.rst. ++ ++.. Links: ++.. _man-pages: https://www.kernel.org/doc/man-pages/ ++.. _bpf(2): https://man7.org/linux/man-pages/man2/bpf.2.html +\ No newline at end of file +diff --git a/Documentation/bpf/test_debug.rst b/Documentation/bpf/test_debug.rst +new file mode 100644 +index 000000000000..ebf0caceb6a6 +--- /dev/null ++++ b/Documentation/bpf/test_debug.rst +@@ -0,0 +1,9 @@ ++========================= ++Testing and debugging BPF ++========================= ++ ++.. toctree:: ++ :maxdepth: 1 ++ ++ drgn ++ s390 +-- +2.38.1 + diff --git a/patches.suse/bpf-docs-Fix-verifier-references.patch b/patches.suse/bpf-docs-Fix-verifier-references.patch new file mode 100644 index 0000000..75c243a --- /dev/null +++ b/patches.suse/bpf-docs-Fix-verifier-references.patch @@ -0,0 +1,45 @@ +From: Christoph Hellwig +Date: Thu, 23 Dec 2021 11:19:03 +0100 +Subject: bpf, docs: Fix verifier references +Patch-mainline: v5.17-rc1 +Git-commit: fa86aa77d4da211fc814325cdb3a572b1b851058 +References: jsc#PED-1368 + +Use normal RST file reference instead of linkage copied from the old filter.rst +document that does not actually work when using HTML output. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211223101906.977624-2-hch@lst.de +Acked-by: Shung-Hsi Yu +--- + Documentation/bpf/instruction-set.rst | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +--- a/Documentation/bpf/instruction-set.rst ++++ b/Documentation/bpf/instruction-set.rst +@@ -203,7 +203,7 @@ Some core changes of the eBPF format fro + bpf_exit + + After the call the registers R1-R5 contain junk values and cannot be read. +- An in-kernel `eBPF verifier`_ is used to validate eBPF programs. ++ An in-kernel verifier.rst is used to validate eBPF programs. + + Also in the new design, eBPF is limited to 4096 insns, which means that any + program will terminate quickly and will only call a fixed number of kernel +@@ -234,7 +234,7 @@ optimizations, socket filters and seccom + filters may use it as assembler to generate code from kernel. In kernel usage + may not be bounded by security considerations, since generated eBPF code + may be optimizing internal code path and not being exposed to the user space. +-Safety of eBPF can come from the `eBPF verifier`_. In such use cases as ++Safety of eBPF can come from the verifier.rst. In such use cases as + described, it may be used as safe instruction set. + + Just like the original BPF, eBPF runs within a controlled environment, +@@ -462,6 +462,3 @@ of two consecutive ``struct bpf_insn`` 8 + instruction that loads 64-bit immediate value into a dst_reg. + Classic BPF has similar instruction: ``BPF_LD | BPF_W | BPF_IMM`` which loads + 32-bit immediate value into a register. +- +-.. Links: +-.. _eBPF verifier: verifiers.rst diff --git a/patches.suse/bpf-docs-Fully-document-the-ALU-opcodes.patch b/patches.suse/bpf-docs-Fully-document-the-ALU-opcodes.patch new file mode 100644 index 0000000..2099ea2 --- /dev/null +++ b/patches.suse/bpf-docs-Fully-document-the-ALU-opcodes.patch @@ -0,0 +1,60 @@ +From: Christoph Hellwig +Date: Mon, 3 Jan 2022 19:35:54 +0100 +Subject: bpf, docs: Fully document the ALU opcodes +Patch-mainline: v5.17-rc1 +Git-commit: 03c517ee9eedd95472c36c6291fc97368b48c9e4 +References: jsc#PED-1368 + +Add pseudo-code to document all the different BPF_ALU / BPF_ALU64 +opcodes. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220103183556.41040-5-hch@lst.de +Acked-by: Shung-Hsi Yu +--- + Documentation/bpf/instruction-set.rst | 30 +++++++++++++++--------------- + 1 file changed, 15 insertions(+), 15 deletions(-) + +--- a/Documentation/bpf/instruction-set.rst ++++ b/Documentation/bpf/instruction-set.rst +@@ -82,24 +82,24 @@ BPF_ALU uses 32-bit wide operands while + otherwise identical operations. + The code field encodes the operation as below: + +- ======== ===== ========================= ++ ======== ===== ========================== + code value description +- ======== ===== ========================= +- BPF_ADD 0x00 +- BPF_SUB 0x10 +- BPF_MUL 0x20 +- BPF_DIV 0x30 +- BPF_OR 0x40 +- BPF_AND 0x50 +- BPF_LSH 0x60 +- BPF_RSH 0x70 +- BPF_NEG 0x80 +- BPF_MOD 0x90 +- BPF_XOR 0xa0 +- BPF_MOV 0xb0 mov reg to reg ++ ======== ===== ========================== ++ BPF_ADD 0x00 dst += src ++ BPF_SUB 0x10 dst -= src ++ BPF_MUL 0x20 dst \*= src ++ BPF_DIV 0x30 dst /= src ++ BPF_OR 0x40 dst \|= src ++ BPF_AND 0x50 dst &= src ++ BPF_LSH 0x60 dst <<= src ++ BPF_RSH 0x70 dst >>= src ++ BPF_NEG 0x80 dst = ~src ++ BPF_MOD 0x90 dst %= src ++ BPF_XOR 0xa0 dst ^= src ++ BPF_MOV 0xb0 dst = src + BPF_ARSH 0xc0 sign extending shift right + BPF_END 0xd0 endianness conversion +- ======== ===== ========================= ++ ======== ===== ========================== + + BPF_ADD | BPF_X | BPF_ALU means:: + diff --git a/patches.suse/bpf-docs-Fully-document-the-JMP-mode-modifiers.patch b/patches.suse/bpf-docs-Fully-document-the-JMP-mode-modifiers.patch new file mode 100644 index 0000000..fe480e5 --- /dev/null +++ b/patches.suse/bpf-docs-Fully-document-the-JMP-mode-modifiers.patch @@ -0,0 +1,41 @@ +From: Christoph Hellwig +Date: Mon, 3 Jan 2022 19:35:56 +0100 +Subject: bpf, docs: Fully document the JMP mode modifiers +Patch-mainline: v5.17-rc1 +Git-commit: 58d8a3fc4a40dcfebf333ab2dc2c7c338249be51 +References: jsc#PED-1368 + +Add a description for all the modifiers. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220103183556.41040-7-hch@lst.de +Acked-by: Shung-Hsi Yu +--- + Documentation/bpf/instruction-set.rst | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/Documentation/bpf/instruction-set.rst ++++ b/Documentation/bpf/instruction-set.rst +@@ -173,15 +173,15 @@ The size modifier is one of: + + The mode modifier is one of: + +- ============= ===== ===================== ++ ============= ===== ==================================== + mode modifier value description +- ============= ===== ===================== ++ ============= ===== ==================================== + BPF_IMM 0x00 used for 64-bit mov +- BPF_ABS 0x20 +- BPF_IND 0x40 +- BPF_MEM 0x60 ++ BPF_ABS 0x20 legacy BPF packet access ++ BPF_IND 0x40 legacy BPF packet access ++ BPF_MEM 0x60 all normal load and store operations + BPF_ATOMIC 0xc0 atomic operations +- ============= ===== ===================== ++ ============= ===== ==================================== + + BPF_MEM | | BPF_STX means:: + diff --git a/patches.suse/bpf-docs-Fully-document-the-JMP-opcodes.patch b/patches.suse/bpf-docs-Fully-document-the-JMP-opcodes.patch new file mode 100644 index 0000000..142fc2b --- /dev/null +++ b/patches.suse/bpf-docs-Fully-document-the-JMP-opcodes.patch @@ -0,0 +1,62 @@ +From: Christoph Hellwig +Date: Mon, 3 Jan 2022 19:35:55 +0100 +Subject: bpf, docs: Fully document the JMP opcodes +Patch-mainline: v5.17-rc1 +Git-commit: 9e533e22b5700097e84b8a841d9e1c251cc132c6 +References: jsc#PED-1368 + +Add pseudo-code to document all the different BPF_JMP / BPF_JMP64 +opcodes. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220103183556.41040-6-hch@lst.de +Acked-by: Shung-Hsi Yu +--- + Documentation/bpf/instruction-set.rst | 34 +++++++++++++++++----------------- + 1 file changed, 17 insertions(+), 17 deletions(-) + +--- a/Documentation/bpf/instruction-set.rst ++++ b/Documentation/bpf/instruction-set.rst +@@ -125,24 +125,24 @@ BPF_JMP32 uses 32-bit wide operands whil + otherwise identical operations. + The code field encodes the operation as below: + +- ======== ===== ========================= +- code value description +- ======== ===== ========================= +- BPF_JA 0x00 BPF_JMP only +- BPF_JEQ 0x10 +- BPF_JGT 0x20 +- BPF_JGE 0x30 +- BPF_JSET 0x40 +- BPF_JNE 0x50 jump '!=' +- BPF_JSGT 0x60 signed '>' +- BPF_JSGE 0x70 signed '>=' ++ ======== ===== ========================= ============ ++ code value description notes ++ ======== ===== ========================= ============ ++ BPF_JA 0x00 PC += off BPF_JMP only ++ BPF_JEQ 0x10 PC += off if dst == src ++ BPF_JGT 0x20 PC += off if dst > src unsigned ++ BPF_JGE 0x30 PC += off if dst >= src unsigned ++ BPF_JSET 0x40 PC += off if dst & src ++ BPF_JNE 0x50 PC += off if dst != src ++ BPF_JSGT 0x60 PC += off if dst > src signed ++ BPF_JSGE 0x70 PC += off if dst >= src signed + BPF_CALL 0x80 function call +- BPF_EXIT 0x90 function return +- BPF_JLT 0xa0 unsigned '<' +- BPF_JLE 0xb0 unsigned '<=' +- BPF_JSLT 0xc0 signed '<' +- BPF_JSLE 0xd0 signed '<=' +- ======== ===== ========================= ++ BPF_EXIT 0x90 function / program return BPF_JMP only ++ BPF_JLT 0xa0 PC += off if dst < src unsigned ++ BPF_JLE 0xb0 PC += off if dst <= src unsigned ++ BPF_JSLT 0xc0 PC += off if dst < src signed ++ BPF_JSLE 0xd0 PC += off if dst <= src signed ++ ======== ===== ========================= ============ + + The eBPF program needs to store the return value into register R0 before doing a + BPF_EXIT. diff --git a/patches.suse/bpf-docs-Generate-nicer-tables-for-instruction-encod.patch b/patches.suse/bpf-docs-Generate-nicer-tables-for-instruction-encod.patch new file mode 100644 index 0000000..52fde53 --- /dev/null +++ b/patches.suse/bpf-docs-Generate-nicer-tables-for-instruction-encod.patch @@ -0,0 +1,247 @@ +From: Christoph Hellwig +Date: Thu, 23 Dec 2021 11:19:05 +0100 +Subject: bpf, docs: Generate nicer tables for instruction encodings +Patch-mainline: v5.17-rc1 +Git-commit: 5e4dd19f00491faf10912e1f15a47ab010e0e9ce +References: jsc#PED-1368 + +Use RST tables that are nicely readable both in plain ascii as well as +in html to render the instruction encodings, and add a few subheadings +to better structure the text. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211223101906.977624-4-hch@lst.de +Acked-by: Shung-Hsi Yu +--- + Documentation/bpf/instruction-set.rst | 168 ++++++++++++++++++++-------------- + 1 file changed, 100 insertions(+), 68 deletions(-) + +--- a/Documentation/bpf/instruction-set.rst ++++ b/Documentation/bpf/instruction-set.rst +@@ -19,19 +19,10 @@ The eBPF calling convention is defined a + R0 - R5 are scratch registers and eBPF programs needs to spill/fill them if + necessary across calls. + +-eBPF opcode encoding +-==================== ++Instruction classes ++=================== + +-For arithmetic and jump instructions the 8-bit 'opcode' field is divided into +-three parts:: +- +- +----------------+--------+--------------------+ +- | 4 bits | 1 bit | 3 bits | +- | operation code | source | instruction class | +- +----------------+--------+--------------------+ +- (MSB) (LSB) +- +-Three LSB bits store instruction class which is one of: ++The three LSB bits of the 'opcode' field store the instruction class: + + ========= ===== + class value +@@ -46,17 +37,34 @@ Three LSB bits store instruction class w + BPF_ALU64 0x07 + ========= ===== + +-When BPF_CLASS(code) == BPF_ALU or BPF_JMP, 4th bit encodes source operand ... +- +-:: +- +- BPF_K 0x00 /* use 32-bit immediate as source operand */ +- BPF_X 0x08 /* use 'src_reg' register as source operand */ +- +-... and four MSB bits store operation code. ++Arithmetic and jump instructions ++================================ + +-If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 BPF_OP(code) is one of:: ++For arithmetic and jump instructions (BPF_ALU, BPF_ALU64, BPF_JMP and ++BPF_JMP32), the 8-bit 'opcode' field is divided into three parts: + ++ ============== ====== ================= ++ 4 bits (MSB) 1 bit 3 bits (LSB) ++ ============== ====== ================= ++ operation code source instruction class ++ ============== ====== ================= ++ ++The 4th bit encodes the source operand: ++ ++ ====== ===== ======================================== ++ source value description ++ ====== ===== ======================================== ++ BPF_K 0x00 use 32-bit immediate as source operand ++ BPF_X 0x08 use 'src_reg' register as source operand ++ ====== ===== ======================================== ++ ++The four MSB bits store the operation code. ++ ++For class BPF_ALU or BPF_ALU64: ++ ++ ======== ===== ========================= ++ code value description ++ ======== ===== ========================= + BPF_ADD 0x00 + BPF_SUB 0x10 + BPF_MUL 0x20 +@@ -68,26 +76,31 @@ If BPF_CLASS(code) == BPF_ALU or BPF_ALU + BPF_NEG 0x80 + BPF_MOD 0x90 + BPF_XOR 0xa0 +- BPF_MOV 0xb0 /* mov reg to reg */ +- BPF_ARSH 0xc0 /* sign extending shift right */ +- BPF_END 0xd0 /* endianness conversion */ +- +-If BPF_CLASS(code) == BPF_JMP or BPF_JMP32 BPF_OP(code) is one of:: +- +- BPF_JA 0x00 /* BPF_JMP only */ ++ BPF_MOV 0xb0 mov reg to reg ++ BPF_ARSH 0xc0 sign extending shift right ++ BPF_END 0xd0 endianness conversion ++ ======== ===== ========================= ++ ++For class BPF_JMP or BPF_JMP32: ++ ++ ======== ===== ========================= ++ code value description ++ ======== ===== ========================= ++ BPF_JA 0x00 BPF_JMP only + BPF_JEQ 0x10 + BPF_JGT 0x20 + BPF_JGE 0x30 + BPF_JSET 0x40 +- BPF_JNE 0x50 /* jump != */ +- BPF_JSGT 0x60 /* signed '>' */ +- BPF_JSGE 0x70 /* signed '>=' */ +- BPF_CALL 0x80 /* function call */ +- BPF_EXIT 0x90 /* function return */ +- BPF_JLT 0xa0 /* unsigned '<' */ +- BPF_JLE 0xb0 /* unsigned '<=' */ +- BPF_JSLT 0xc0 /* signed '<' */ +- BPF_JSLE 0xd0 /* signed '<=' */ ++ BPF_JNE 0x50 jump '!=' ++ BPF_JSGT 0x60 signed '>' ++ BPF_JSGE 0x70 signed '>=' ++ BPF_CALL 0x80 function call ++ BPF_EXIT 0x90 function return ++ BPF_JLT 0xa0 unsigned '<' ++ BPF_JLE 0xb0 unsigned '<=' ++ BPF_JSLT 0xc0 signed '<' ++ BPF_JSLE 0xd0 signed '<=' ++ ======== ===== ========================= + + So BPF_ADD | BPF_X | BPF_ALU means:: + +@@ -108,37 +121,58 @@ the return value into register R0 before + BPF_JMP32 to mean exactly the same operations as BPF_JMP, but with 32-bit wide + operands for the comparisons instead. + +-For load and store instructions the 8-bit 'code' field is divided as:: + +- +--------+--------+-------------------+ +- | 3 bits | 2 bits | 3 bits | +- | mode | size | instruction class | +- +--------+--------+-------------------+ +- (MSB) (LSB) ++Load and store instructions ++=========================== ++ ++For load and store instructions (BPF_LD, BPF_LDX, BPF_ST and BPF_STX), the ++8-bit 'opcode' field is divided as: ++ ++ ============ ====== ================= ++ 3 bits (MSB) 2 bits 3 bits (LSB) ++ ============ ====== ================= ++ mode size instruction class ++ ============ ====== ================= ++ ++The size modifier is one of: + +-Size modifier is one of ... ++ ============= ===== ===================== ++ size modifier value description ++ ============= ===== ===================== ++ BPF_W 0x00 word (4 bytes) ++ BPF_H 0x08 half word (2 bytes) ++ BPF_B 0x10 byte ++ BPF_DW 0x18 double word (8 bytes) ++ ============= ===== ===================== + +-:: ++The mode modifier is one of: + +- BPF_W 0x00 /* word */ +- BPF_H 0x08 /* half word */ +- BPF_B 0x10 /* byte */ +- BPF_DW 0x18 /* double word */ ++ ============= ===== ===================== ++ mode modifier value description ++ ============= ===== ===================== ++ BPF_IMM 0x00 used for 64-bit mov ++ BPF_ABS 0x20 ++ BPF_IND 0x40 ++ BPF_MEM 0x60 ++ BPF_ATOMIC 0xc0 atomic operations ++ ============= ===== ===================== + +-... which encodes size of load/store operation:: ++BPF_MEM | | BPF_STX means:: + +- B - 1 byte +- H - 2 byte +- W - 4 byte +- DW - 8 byte ++ *(size *) (dst_reg + off) = src_reg + +-Mode modifier is one of:: ++BPF_MEM | | BPF_ST means:: + +- BPF_IMM 0x00 /* used for 64-bit mov */ +- BPF_ABS 0x20 +- BPF_IND 0x40 +- BPF_MEM 0x60 +- BPF_ATOMIC 0xc0 /* atomic operations */ ++ *(size *) (dst_reg + off) = imm32 ++ ++BPF_MEM | | BPF_LDX means:: ++ ++ dst_reg = *(size *) (src_reg + off) ++ ++Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. ++ ++Packet access instructions ++-------------------------- + + eBPF has two non-generic instructions: (BPF_ABS | | BPF_LD) and + (BPF_IND | | BPF_LD) which are used to access packet data. +@@ -165,15 +199,10 @@ For example:: + R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32)) + and R1 - R5 were scratched. + +-eBPF has generic load/store operations:: ++Atomic operations ++----------------- + +- BPF_MEM | | BPF_STX: *(size *) (dst_reg + off) = src_reg +- BPF_MEM | | BPF_ST: *(size *) (dst_reg + off) = imm32 +- BPF_MEM | | BPF_LDX: dst_reg = *(size *) (src_reg + off) +- +-Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. +- +-It also includes atomic operations, which use the immediate field for extra ++eBPF includes atomic operations, which use the immediate field for extra + encoding:: + + .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg +@@ -217,6 +246,9 @@ You may encounter ``BPF_XADD`` - this is + referring to the exclusive-add operation encoded when the immediate field is + zero. + ++16-byte instructions ++-------------------- ++ + eBPF has one 16-byte instruction: ``BPF_LD | BPF_DW | BPF_IMM`` which consists + of two consecutive ``struct bpf_insn`` 8-byte blocks and interpreted as single + instruction that loads 64-bit immediate value into a dst_reg. diff --git a/patches.suse/bpf-docs-Move-handling-of-maps-to-Documentation-bpf-.patch b/patches.suse/bpf-docs-Move-handling-of-maps-to-Documentation-bpf-.patch new file mode 100644 index 0000000..3114b31 --- /dev/null +++ b/patches.suse/bpf-docs-Move-handling-of-maps-to-Documentation-bpf-.patch @@ -0,0 +1,133 @@ +From: Christoph Hellwig +Date: Fri, 19 Nov 2021 17:32:14 +0100 +Subject: bpf, docs: Move handling of maps to Documentation/bpf/maps.rst +Patch-mainline: v5.17-rc1 +Git-commit: bc84e959e5aed4a79597d03e810fd1d7067b4ff7 +References: jsc#PED-1368 + +Move the general maps documentation into the maps.rst file from the +overall networking filter documentation and add a link instead. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Alexei Starovoitov +Acked-by: Song Liu +Link: https://lore.kernel.org/bpf/20211119163215.971383-5-hch@lst.de +Acked-by: Shung-Hsi Yu +--- + Documentation/bpf/maps.rst | 43 ++++++++++++++++++++++++++++++++ + Documentation/networking/filter.rst | 47 ++---------------------------------- + 2 files changed, 46 insertions(+), 44 deletions(-) + +--- a/Documentation/bpf/maps.rst ++++ b/Documentation/bpf/maps.rst +@@ -1,4 +1,47 @@ ++ ++========= ++eBPF maps + ========= ++ ++'maps' is a generic storage of different types for sharing data between kernel ++and userspace. ++ ++The maps are accessed from user space via BPF syscall, which has commands: ++ ++- create a map with given type and attributes ++ ``map_fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size)`` ++ using attr->map_type, attr->key_size, attr->value_size, attr->max_entries ++ returns process-local file descriptor or negative error ++ ++- lookup key in a given map ++ ``err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)`` ++ using attr->map_fd, attr->key, attr->value ++ returns zero and stores found elem into value or negative error ++ ++- create or update key/value pair in a given map ++ ``err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)`` ++ using attr->map_fd, attr->key, attr->value ++ returns zero or negative error ++ ++- find and delete element by key in a given map ++ ``err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)`` ++ using attr->map_fd, attr->key ++ ++- to delete map: close(fd) ++ Exiting process will delete maps automatically ++ ++userspace programs use this syscall to create/access maps that eBPF programs ++are concurrently updating. ++ ++maps can have different types: hash, array, bloom filter, radix-tree, etc. ++ ++The map is defined by: ++ ++ - type ++ - max number of elements ++ - key size in bytes ++ - value size in bytes ++ + Map Types + ========= + +--- a/Documentation/networking/filter.rst ++++ b/Documentation/networking/filter.rst +@@ -1223,9 +1223,9 @@ pointer type. The types of pointers des + Pointer to the value stored in a map element. + PTR_TO_MAP_VALUE_OR_NULL + Either a pointer to a map value, or NULL; map accesses +- (see section 'eBPF maps', below) return this type, +- which becomes a PTR_TO_MAP_VALUE when checked != NULL. +- Arithmetic on these pointers is forbidden. ++ (see maps.rst) return this type, which becomes a ++ a PTR_TO_MAP_VALUE when checked != NULL. Arithmetic on ++ these pointers is forbidden. + PTR_TO_STACK + Frame pointer. + PTR_TO_PACKET +@@ -1393,47 +1393,6 @@ using normal C code as:: + which makes such programs easier to write comparing to LD_ABS insn + and significantly faster. + +-eBPF maps +---------- +-'maps' is a generic storage of different types for sharing data between kernel +-and userspace. +- +-The maps are accessed from user space via BPF syscall, which has commands: +- +-- create a map with given type and attributes +- ``map_fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size)`` +- using attr->map_type, attr->key_size, attr->value_size, attr->max_entries +- returns process-local file descriptor or negative error +- +-- lookup key in a given map +- ``err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)`` +- using attr->map_fd, attr->key, attr->value +- returns zero and stores found elem into value or negative error +- +-- create or update key/value pair in a given map +- ``err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)`` +- using attr->map_fd, attr->key, attr->value +- returns zero or negative error +- +-- find and delete element by key in a given map +- ``err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)`` +- using attr->map_fd, attr->key +- +-- to delete map: close(fd) +- Exiting process will delete maps automatically +- +-userspace programs use this syscall to create/access maps that eBPF programs +-are concurrently updating. +- +-maps can have different types: hash, array, bloom filter, radix-tree, etc. +- +-The map is defined by: +- +- - type +- - max number of elements +- - key size in bytes +- - value size in bytes +- + Pruning + ------- + The verifier does not actually walk all possible paths through the program. For diff --git a/patches.suse/bpf-docs-Move-the-packet-access-instructions-last-in.patch b/patches.suse/bpf-docs-Move-the-packet-access-instructions-last-in.patch new file mode 100644 index 0000000..f2509eb --- /dev/null +++ b/patches.suse/bpf-docs-Move-the-packet-access-instructions-last-in.patch @@ -0,0 +1,88 @@ +From: Christoph Hellwig +Date: Thu, 23 Dec 2021 11:19:06 +0100 +Subject: bpf, docs: Move the packet access instructions last in + instruction-set.rst +Patch-mainline: v5.17-rc1 +Git-commit: 63d000c3dc0a8ddd2f3778982ce6d19593656eda +References: jsc#PED-1368 + +The packet access instructions are a convoluted leftover from classic +BPF. Move them last past the much more important atomic operations, +and improve the rendering of the code example. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211223101906.977624-5-hch@lst.de +Acked-by: Shung-Hsi Yu +--- + Documentation/bpf/instruction-set.rst | 55 ++++++++++++++++------------------ + 1 file changed, 27 insertions(+), 28 deletions(-) + +--- a/Documentation/bpf/instruction-set.rst ++++ b/Documentation/bpf/instruction-set.rst +@@ -171,34 +171,6 @@ BPF_MEM | | BPF_LDX means:: + + Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. + +-Packet access instructions +--------------------------- +- +-eBPF has two non-generic instructions: (BPF_ABS | | BPF_LD) and +-(BPF_IND | | BPF_LD) which are used to access packet data. +- +-They had to be carried over from classic BPF to have strong performance of +-socket filters running in eBPF interpreter. These instructions can only +-be used when interpreter context is a pointer to ``struct sk_buff`` and +-have seven implicit operands. Register R6 is an implicit input that must +-contain pointer to sk_buff. Register R0 is an implicit output which contains +-the data fetched from the packet. Registers R1-R5 are scratch registers +-and must not be used to store the data across BPF_ABS | BPF_LD or +-BPF_IND | BPF_LD instructions. +- +-These instructions have implicit program exit condition as well. When +-eBPF program is trying to access the data beyond the packet boundary, +-the interpreter will abort the execution of the program. JIT compilers +-therefore must preserve this property. src_reg and imm32 fields are +-explicit inputs to these instructions. +- +-For example:: +- +- BPF_IND | BPF_W | BPF_LD means: +- +- R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32)) +- and R1 - R5 were scratched. +- + Atomic operations + ----------------- + +@@ -252,3 +224,30 @@ zero. + eBPF has one 16-byte instruction: ``BPF_LD | BPF_DW | BPF_IMM`` which consists + of two consecutive ``struct bpf_insn`` 8-byte blocks and interpreted as single + instruction that loads 64-bit immediate value into a dst_reg. ++ ++Packet access instructions ++-------------------------- ++ ++eBPF has two non-generic instructions: (BPF_ABS | | BPF_LD) and ++(BPF_IND | | BPF_LD) which are used to access packet data. ++ ++They had to be carried over from classic BPF to have strong performance of ++socket filters running in eBPF interpreter. These instructions can only ++be used when interpreter context is a pointer to ``struct sk_buff`` and ++have seven implicit operands. Register R6 is an implicit input that must ++contain pointer to sk_buff. Register R0 is an implicit output which contains ++the data fetched from the packet. Registers R1-R5 are scratch registers ++and must not be used to store the data across BPF_ABS | BPF_LD or ++BPF_IND | BPF_LD instructions. ++ ++These instructions have implicit program exit condition as well. When ++eBPF program is trying to access the data beyond the packet boundary, ++the interpreter will abort the execution of the program. JIT compilers ++therefore must preserve this property. src_reg and imm32 fields are ++explicit inputs to these instructions. ++ ++For example, BPF_IND | BPF_W | BPF_LD means:: ++ ++ R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32)) ++ ++and R1 - R5 are clobbered. diff --git a/patches.suse/bpf-docs-Prune-all-references-to-internal-BPF.patch b/patches.suse/bpf-docs-Prune-all-references-to-internal-BPF.patch new file mode 100644 index 0000000..056b7ae --- /dev/null +++ b/patches.suse/bpf-docs-Prune-all-references-to-internal-BPF.patch @@ -0,0 +1,181 @@ +From: Christoph Hellwig +Date: Fri, 19 Nov 2021 17:32:13 +0100 +Subject: bpf, docs: Prune all references to "internal BPF" +Patch-mainline: v5.17-rc1 +Git-commit: 06edc59c1fd7aababc8361655b20f4cc9870aef2 +References: jsc#PED-1368 + +The eBPF name has completely taken over from eBPF in general usage for +the actual eBPF representation, or BPF for any general in-kernel use. +Prune all remaining references to "internal BPF". + +Signed-off-by: Christoph Hellwig +Signed-off-by: Alexei Starovoitov +Acked-by: Song Liu +Link: https://lore.kernel.org/bpf/20211119163215.971383-4-hch@lst.de +Acked-by: Shung-Hsi Yu +--- + Documentation/networking/filter.rst | 22 +++++++++++----------- + arch/arm/net/bpf_jit_32.c | 2 +- + arch/arm64/net/bpf_jit_comp.c | 2 +- + arch/sparc/net/bpf_jit_comp_64.c | 2 +- + kernel/bpf/core.c | 2 +- + net/core/filter.c | 11 +++++------ + 6 files changed, 20 insertions(+), 21 deletions(-) + +--- a/Documentation/networking/filter.rst ++++ b/Documentation/networking/filter.rst +@@ -608,7 +608,7 @@ format with similar underlying principle + paragraphs is being used. However, the instruction set format is modelled + closer to the underlying architecture to mimic native instruction sets, so + that a better performance can be achieved (more details later). This new +-ISA is called 'eBPF' or 'internal BPF' interchangeably. (Note: eBPF which ++ISA is called 'eBPF'. (Note: eBPF which + originates from [e]xtended BPF is not the same as BPF extensions! While + eBPF is an ISA, BPF extensions date back to classic BPF's 'overloading' + of BPF_LD | BPF_{B,H,W} | BPF_ABS instruction.) +@@ -681,7 +681,7 @@ Some core changes of the new internal fo + That behavior maps directly to x86_64 and arm64 subregister definition, but + makes other JITs more difficult. + +- 32-bit architectures run 64-bit internal BPF programs via interpreter. ++ 32-bit architectures run 64-bit eBPF programs via interpreter. + Their JITs may convert BPF programs that only use 32-bit subregisters into + native instruction set and let the rest being interpreted. + +@@ -702,7 +702,7 @@ Some core changes of the new internal fo + - Introduces bpf_call insn and register passing convention for zero overhead + calls from/to other kernel functions: + +- Before an in-kernel function call, the internal BPF program needs to ++ Before an in-kernel function call, the eBPF program needs to + place function arguments into R1 to R5 registers to satisfy calling + convention, then the interpreter will take them from registers and pass + to in-kernel function. If R1 - R5 registers are mapped to CPU registers +@@ -771,7 +771,7 @@ Some core changes of the new internal fo + ... since x86_64 ABI mandates rdi, rsi, rdx, rcx, r8, r9 for argument passing + and rbx, r12 - r15 are callee saved. + +- Then the following internal BPF pseudo-program:: ++ Then the following eBPF pseudo-program:: + + bpf_mov R6, R1 /* save ctx */ + bpf_mov R2, 2 +@@ -837,7 +837,7 @@ Some core changes of the new internal fo + bpf_exit + + After the call the registers R1-R5 contain junk values and cannot be read. +- An in-kernel eBPF verifier is used to validate internal BPF programs. ++ An in-kernel eBPF verifier is used to validate eBPF programs. + + Also in the new design, eBPF is limited to 4096 insns, which means that any + program will terminate quickly and will only call a fixed number of kernel +@@ -852,23 +852,23 @@ A program, that is translated internally + + op:16, jt:8, jf:8, k:32 ==> op:8, dst_reg:4, src_reg:4, off:16, imm:32 + +-So far 87 internal BPF instructions were implemented. 8-bit 'op' opcode field ++So far 87 eBPF instructions were implemented. 8-bit 'op' opcode field + has room for new instructions. Some of them may use 16/24/32 byte encoding. New + instructions must be multiple of 8 bytes to preserve backward compatibility. + +-Internal BPF is a general purpose RISC instruction set. Not every register and ++eBPF is a general purpose RISC instruction set. Not every register and + every instruction are used during translation from original BPF to new format. + For example, socket filters are not using ``exclusive add`` instruction, but + tracing filters may do to maintain counters of events, for example. Register R9 + is not used by socket filters either, but more complex filters may be running + out of registers and would have to resort to spill/fill to stack. + +-Internal BPF can be used as a generic assembler for last step performance ++eBPF can be used as a generic assembler for last step performance + optimizations, socket filters and seccomp are using it as assembler. Tracing + filters may use it as assembler to generate code from kernel. In kernel usage +-may not be bounded by security considerations, since generated internal BPF code ++may not be bounded by security considerations, since generated eBPF code + may be optimizing internal code path and not being exposed to the user space. +-Safety of internal BPF can come from a verifier (TBD). In such use cases as ++Safety of eBPF can come from a verifier (TBD). In such use cases as + described, it may be used as safe instruction set. + + Just like the original BPF, the new format runs within a controlled environment, +@@ -1666,7 +1666,7 @@ Testing + ------- + + Next to the BPF toolchain, the kernel also ships a test module that contains +-various test cases for classic and internal BPF that can be executed against ++various test cases for classic and eBPF that can be executed against + the BPF interpreter and JIT compiler. It can be found in lib/test_bpf.c and + enabled via Kconfig:: + +--- a/arch/arm/net/bpf_jit_32.c ++++ b/arch/arm/net/bpf_jit_32.c +@@ -163,7 +163,7 @@ static const s8 bpf2a32[][2] = { + [BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)}, + /* Read only Frame Pointer to access Stack */ + [BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)}, +- /* Temporary Register for internal BPF JIT, can be used ++ /* Temporary Register for BPF JIT, can be used + * for constant blindings and others. + */ + [TMP_REG_1] = {ARM_R7, ARM_R6}, +--- a/arch/arm64/net/bpf_jit_comp.c ++++ b/arch/arm64/net/bpf_jit_comp.c +@@ -43,7 +43,7 @@ static const int bpf2a64[] = { + [BPF_REG_9] = A64_R(22), + /* read-only frame pointer to access stack */ + [BPF_REG_FP] = A64_R(25), +- /* temporary registers for internal BPF JIT */ ++ /* temporary registers for BPF JIT */ + [TMP_REG_1] = A64_R(10), + [TMP_REG_2] = A64_R(11), + [TMP_REG_3] = A64_R(12), +--- a/arch/sparc/net/bpf_jit_comp_64.c ++++ b/arch/sparc/net/bpf_jit_comp_64.c +@@ -227,7 +227,7 @@ static const int bpf2sparc[] = { + + [BPF_REG_AX] = G7, + +- /* temporary register for internal BPF JIT */ ++ /* temporary register for BPF JIT */ + [TMP_REG_1] = G1, + [TMP_REG_2] = G2, + [TMP_REG_3] = G3, +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -1892,7 +1892,7 @@ static void bpf_prog_select_func(struct + + /** + * bpf_prog_select_runtime - select exec runtime for BPF program +- * @fp: bpf_prog populated with internal BPF program ++ * @fp: bpf_prog populated with BPF program + * @err: pointer to error variable + * + * Try to JIT eBPF program, if JIT is not available, use interpreter. +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -1242,10 +1242,9 @@ static struct bpf_prog *bpf_migrate_filt + int err, new_len, old_len = fp->len; + bool seen_ld_abs = false; + +- /* We are free to overwrite insns et al right here as it +- * won't be used at this point in time anymore internally +- * after the migration to the internal BPF instruction +- * representation. ++ /* We are free to overwrite insns et al right here as it won't be used at ++ * this point in time anymore internally after the migration to the eBPF ++ * instruction representation. + */ + BUILD_BUG_ON(sizeof(struct sock_filter) != + sizeof(struct bpf_insn)); +@@ -1336,8 +1335,8 @@ static struct bpf_prog *bpf_prepare_filt + */ + bpf_jit_compile(fp); + +- /* JIT compiler couldn't process this filter, so do the +- * internal BPF translation for the optimized interpreter. ++ /* JIT compiler couldn't process this filter, so do the eBPF translation ++ * for the optimized interpreter. + */ + if (!fp->jited) + fp = bpf_migrate_filter(fp); diff --git a/patches.suse/bpf-docs-Rename-bpf_lsm.rst-to-prog_lsm.rst.patch b/patches.suse/bpf-docs-Rename-bpf_lsm.rst-to-prog_lsm.rst.patch new file mode 100644 index 0000000..94db93e --- /dev/null +++ b/patches.suse/bpf-docs-Rename-bpf_lsm.rst-to-prog_lsm.rst.patch @@ -0,0 +1,326 @@ +From: Dave Tucker +Date: Fri, 12 Nov 2021 21:17:23 +0000 +Subject: bpf, docs: Rename bpf_lsm.rst to prog_lsm.rst +Patch-mainline: v5.17-rc1 +Git-commit: f5b1c2ef43d79e054f471dc96996ac40bb262d8d +References: jsc#PED-1368 + +This allows for documentation relating to BPF Program Types to be +matched by the glob pattern prog_* for inclusion in a sphinx toctree + +Signed-off-by: Dave Tucker +Signed-off-by: Daniel Borkmann +Acked-by: KP Singh +Link: https://lore.kernel.org/bpf/49fe0f370a2b28500c1b60f1fdb6fb7ec90de28a.1636749493.git.dave@dtucker.co.uk +Acked-by: Shung-Hsi Yu +--- + Documentation/bpf/{bpf_lsm.rst => prog_lsm.rst} | 0 + Documentation/bpf/bpf_lsm.rst | 143 ----------------------------------------- + Documentation/bpf/prog_lsm.rst | 143 +++++++++++++++++++++++++++++++++++++++++ + MAINTAINERS | 2 + 3 files changed, 144 insertions(+), 144 deletions(-) + rename Documentation/bpf/{bpf_lsm.rst => prog_lsm.rst} (100%) + +--- a/Documentation/bpf/bpf_lsm.rst ++++ /dev/null +@@ -1,143 +0,0 @@ +-.. SPDX-License-Identifier: GPL-2.0+ +-.. Copyright (C) 2020 Google LLC. +- +-================ +-LSM BPF Programs +-================ +- +-These BPF programs allow runtime instrumentation of the LSM hooks by privileged +-users to implement system-wide MAC (Mandatory Access Control) and Audit +-policies using eBPF. +- +-Structure +---------- +- +-The example shows an eBPF program that can be attached to the ``file_mprotect`` +-LSM hook: +- +-.. c:function:: int file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); +- +-Other LSM hooks which can be instrumented can be found in +-``include/linux/lsm_hooks.h``. +- +-eBPF programs that use Documentation/bpf/btf.rst do not need to include kernel +-headers for accessing information from the attached eBPF program's context. +-They can simply declare the structures in the eBPF program and only specify +-the fields that need to be accessed. +- +-.. code-block:: c +- +- struct mm_struct { +- unsigned long start_brk, brk, start_stack; +- } __attribute__((preserve_access_index)); +- +- struct vm_area_struct { +- unsigned long start_brk, brk, start_stack; +- unsigned long vm_start, vm_end; +- struct mm_struct *vm_mm; +- } __attribute__((preserve_access_index)); +- +- +-.. note:: The order of the fields is irrelevant. +- +-This can be further simplified (if one has access to the BTF information at +-build time) by generating the ``vmlinux.h`` with: +- +-.. code-block:: console +- +- # bpftool btf dump file format c > vmlinux.h +- +-.. note:: ``path-to-btf-vmlinux`` can be ``/sys/kernel/btf/vmlinux`` if the +- build environment matches the environment the BPF programs are +- deployed in. +- +-The ``vmlinux.h`` can then simply be included in the BPF programs without +-requiring the definition of the types. +- +-The eBPF programs can be declared using the``BPF_PROG`` +-macros defined in `tools/lib/bpf/bpf_tracing.h`_. In this +-example: +- +- * ``"lsm/file_mprotect"`` indicates the LSM hook that the program must +- be attached to +- * ``mprotect_audit`` is the name of the eBPF program +- +-.. code-block:: c +- +- SEC("lsm/file_mprotect") +- int BPF_PROG(mprotect_audit, struct vm_area_struct *vma, +- unsigned long reqprot, unsigned long prot, int ret) +- { +- /* ret is the return value from the previous BPF program +- * or 0 if it's the first hook. +- */ +- if (ret != 0) +- return ret; +- +- int is_heap; +- +- is_heap = (vma->vm_start >= vma->vm_mm->start_brk && +- vma->vm_end <= vma->vm_mm->brk); +- +- /* Return an -EPERM or write information to the perf events buffer +- * for auditing +- */ +- if (is_heap) +- return -EPERM; +- } +- +-The ``__attribute__((preserve_access_index))`` is a clang feature that allows +-the BPF verifier to update the offsets for the access at runtime using the +-Documentation/bpf/btf.rst information. Since the BPF verifier is aware of the +-types, it also validates all the accesses made to the various types in the +-eBPF program. +- +-Loading +-------- +- +-eBPF programs can be loaded with the :manpage:`bpf(2)` syscall's +-``BPF_PROG_LOAD`` operation: +- +-.. code-block:: c +- +- struct bpf_object *obj; +- +- obj = bpf_object__open("./my_prog.o"); +- bpf_object__load(obj); +- +-This can be simplified by using a skeleton header generated by ``bpftool``: +- +-.. code-block:: console +- +- # bpftool gen skeleton my_prog.o > my_prog.skel.h +- +-and the program can be loaded by including ``my_prog.skel.h`` and using +-the generated helper, ``my_prog__open_and_load``. +- +-Attachment to LSM Hooks +------------------------ +- +-The LSM allows attachment of eBPF programs as LSM hooks using :manpage:`bpf(2)` +-syscall's ``BPF_RAW_TRACEPOINT_OPEN`` operation or more simply by +-using the libbpf helper ``bpf_program__attach_lsm``. +- +-The program can be detached from the LSM hook by *destroying* the ``link`` +-link returned by ``bpf_program__attach_lsm`` using ``bpf_link__destroy``. +- +-One can also use the helpers generated in ``my_prog.skel.h`` i.e. +-``my_prog__attach`` for attachment and ``my_prog__destroy`` for cleaning up. +- +-Examples +--------- +- +-An example eBPF program can be found in +-`tools/testing/selftests/bpf/progs/lsm.c`_ and the corresponding +-userspace code in `tools/testing/selftests/bpf/prog_tests/test_lsm.c`_ +- +-.. Links +-.. _tools/lib/bpf/bpf_tracing.h: +- https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/lib/bpf/bpf_tracing.h +-.. _tools/testing/selftests/bpf/progs/lsm.c: +- https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/testing/selftests/bpf/progs/lsm.c +-.. _tools/testing/selftests/bpf/prog_tests/test_lsm.c: +- https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/testing/selftests/bpf/prog_tests/test_lsm.c +--- /dev/null ++++ b/Documentation/bpf/prog_lsm.rst +@@ -0,0 +1,143 @@ ++.. SPDX-License-Identifier: GPL-2.0+ ++.. Copyright (C) 2020 Google LLC. ++ ++================ ++LSM BPF Programs ++================ ++ ++These BPF programs allow runtime instrumentation of the LSM hooks by privileged ++users to implement system-wide MAC (Mandatory Access Control) and Audit ++policies using eBPF. ++ ++Structure ++--------- ++ ++The example shows an eBPF program that can be attached to the ``file_mprotect`` ++LSM hook: ++ ++.. c:function:: int file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); ++ ++Other LSM hooks which can be instrumented can be found in ++``include/linux/lsm_hooks.h``. ++ ++eBPF programs that use Documentation/bpf/btf.rst do not need to include kernel ++headers for accessing information from the attached eBPF program's context. ++They can simply declare the structures in the eBPF program and only specify ++the fields that need to be accessed. ++ ++.. code-block:: c ++ ++ struct mm_struct { ++ unsigned long start_brk, brk, start_stack; ++ } __attribute__((preserve_access_index)); ++ ++ struct vm_area_struct { ++ unsigned long start_brk, brk, start_stack; ++ unsigned long vm_start, vm_end; ++ struct mm_struct *vm_mm; ++ } __attribute__((preserve_access_index)); ++ ++ ++.. note:: The order of the fields is irrelevant. ++ ++This can be further simplified (if one has access to the BTF information at ++build time) by generating the ``vmlinux.h`` with: ++ ++.. code-block:: console ++ ++ # bpftool btf dump file format c > vmlinux.h ++ ++.. note:: ``path-to-btf-vmlinux`` can be ``/sys/kernel/btf/vmlinux`` if the ++ build environment matches the environment the BPF programs are ++ deployed in. ++ ++The ``vmlinux.h`` can then simply be included in the BPF programs without ++requiring the definition of the types. ++ ++The eBPF programs can be declared using the``BPF_PROG`` ++macros defined in `tools/lib/bpf/bpf_tracing.h`_. In this ++example: ++ ++ * ``"lsm/file_mprotect"`` indicates the LSM hook that the program must ++ be attached to ++ * ``mprotect_audit`` is the name of the eBPF program ++ ++.. code-block:: c ++ ++ SEC("lsm/file_mprotect") ++ int BPF_PROG(mprotect_audit, struct vm_area_struct *vma, ++ unsigned long reqprot, unsigned long prot, int ret) ++ { ++ /* ret is the return value from the previous BPF program ++ * or 0 if it's the first hook. ++ */ ++ if (ret != 0) ++ return ret; ++ ++ int is_heap; ++ ++ is_heap = (vma->vm_start >= vma->vm_mm->start_brk && ++ vma->vm_end <= vma->vm_mm->brk); ++ ++ /* Return an -EPERM or write information to the perf events buffer ++ * for auditing ++ */ ++ if (is_heap) ++ return -EPERM; ++ } ++ ++The ``__attribute__((preserve_access_index))`` is a clang feature that allows ++the BPF verifier to update the offsets for the access at runtime using the ++Documentation/bpf/btf.rst information. Since the BPF verifier is aware of the ++types, it also validates all the accesses made to the various types in the ++eBPF program. ++ ++Loading ++------- ++ ++eBPF programs can be loaded with the :manpage:`bpf(2)` syscall's ++``BPF_PROG_LOAD`` operation: ++ ++.. code-block:: c ++ ++ struct bpf_object *obj; ++ ++ obj = bpf_object__open("./my_prog.o"); ++ bpf_object__load(obj); ++ ++This can be simplified by using a skeleton header generated by ``bpftool``: ++ ++.. code-block:: console ++ ++ # bpftool gen skeleton my_prog.o > my_prog.skel.h ++ ++and the program can be loaded by including ``my_prog.skel.h`` and using ++the generated helper, ``my_prog__open_and_load``. ++ ++Attachment to LSM Hooks ++----------------------- ++ ++The LSM allows attachment of eBPF programs as LSM hooks using :manpage:`bpf(2)` ++syscall's ``BPF_RAW_TRACEPOINT_OPEN`` operation or more simply by ++using the libbpf helper ``bpf_program__attach_lsm``. ++ ++The program can be detached from the LSM hook by *destroying* the ``link`` ++link returned by ``bpf_program__attach_lsm`` using ``bpf_link__destroy``. ++ ++One can also use the helpers generated in ``my_prog.skel.h`` i.e. ++``my_prog__attach`` for attachment and ``my_prog__destroy`` for cleaning up. ++ ++Examples ++-------- ++ ++An example eBPF program can be found in ++`tools/testing/selftests/bpf/progs/lsm.c`_ and the corresponding ++userspace code in `tools/testing/selftests/bpf/prog_tests/test_lsm.c`_ ++ ++.. Links ++.. _tools/lib/bpf/bpf_tracing.h: ++ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/lib/bpf/bpf_tracing.h ++.. _tools/testing/selftests/bpf/progs/lsm.c: ++ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/testing/selftests/bpf/progs/lsm.c ++.. _tools/testing/selftests/bpf/prog_tests/test_lsm.c: ++ https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/testing/selftests/bpf/prog_tests/test_lsm.c +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -3491,7 +3491,7 @@ R: Florent Revest + R: Brendan Jackman + L: bpf@vger.kernel.org + S: Maintained +-F: Documentation/bpf/bpf_lsm.rst ++F: Documentation/bpf/prog_lsm.rst + F: include/linux/bpf_lsm.h + F: kernel/bpf/bpf_lsm.c + F: security/bpf/ diff --git a/patches.suse/bpf-docs-Split-general-purpose-eBPF-documentation-ou.patch b/patches.suse/bpf-docs-Split-general-purpose-eBPF-documentation-ou.patch new file mode 100644 index 0000000..6cd9713 --- /dev/null +++ b/patches.suse/bpf-docs-Split-general-purpose-eBPF-documentation-ou.patch @@ -0,0 +1,2077 @@ +From: Christoph Hellwig +Date: Fri, 19 Nov 2021 17:32:15 +0100 +Subject: bpf, docs: Split general purpose eBPF documentation out of filter.rst +Patch-mainline: v5.17-rc1 +Git-commit: 88691e9e1ef59fa917b2bc2df47d550e7635e73c +References: jsc#PED-1368 + +filter.rst starts out documenting the classic BPF and then spills into +introducing and documentating eBPF. Move the eBPF documentation into +rwo new files under Documentation/bpf/ for the instruction set and +the verifier and link to the BPF documentation from filter.rst. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Alexei Starovoitov +Acked-by: Song Liu +Link: https://lore.kernel.org/bpf/20211119163215.971383-6-hch@lst.de +Acked-by: Shung-Hsi Yu +--- + Documentation/bpf/index.rst | 9 + Documentation/bpf/instruction-set.rst | 467 +++++++++++++++ + Documentation/bpf/verifier.rst | 529 ++++++++++++++++++ + Documentation/networking/filter.rst | 993 ---------------------------------- + 4 files changed, 1008 insertions(+), 990 deletions(-) + create mode 100644 Documentation/bpf/instruction-set.rst + create mode 100644 Documentation/bpf/verifier.rst + +--- a/Documentation/bpf/index.rst ++++ b/Documentation/bpf/index.rst +@@ -5,16 +5,15 @@ BPF Documentation + This directory contains documentation for the BPF (Berkeley Packet + Filter) facility, with a focus on the extended BPF version (eBPF). + +-This kernel side documentation is still work in progress. The main +-textual documentation is (for historical reasons) described in +-:ref:`networking-filter`, which describe both classical and extended +-BPF instruction-set. ++This kernel side documentation is still work in progress. + The Cilium project also maintains a `BPF and XDP Reference Guide`_ + that goes into great technical depth about the BPF Architecture. + + .. toctree:: + :maxdepth: 1 + ++ instruction-set ++ verifier + libbpf/index + btf + faq +@@ -34,4 +33,4 @@ that goes into great technical depth abo + * :ref:`genindex` + + .. Links: +-.. _BPF and XDP Reference Guide: https://docs.cilium.io/en/latest/bpf/ +\ No newline at end of file ++.. _BPF and XDP Reference Guide: https://docs.cilium.io/en/latest/bpf/ +--- /dev/null ++++ b/Documentation/bpf/instruction-set.rst +@@ -0,0 +1,467 @@ ++ ++==================== ++eBPF Instruction Set ++==================== ++ ++eBPF is designed to be JITed with one to one mapping, which can also open up ++the possibility for GCC/LLVM compilers to generate optimized eBPF code through ++an eBPF backend that performs almost as fast as natively compiled code. ++ ++Some core changes of the eBPF format from classic BPF: ++ ++- Number of registers increase from 2 to 10: ++ ++ The old format had two registers A and X, and a hidden frame pointer. The ++ new layout extends this to be 10 internal registers and a read-only frame ++ pointer. Since 64-bit CPUs are passing arguments to functions via registers ++ the number of args from eBPF program to in-kernel function is restricted ++ to 5 and one register is used to accept return value from an in-kernel ++ function. Natively, x86_64 passes first 6 arguments in registers, aarch64/ ++ sparcv9/mips64 have 7 - 8 registers for arguments; x86_64 has 6 callee saved ++ registers, and aarch64/sparcv9/mips64 have 11 or more callee saved registers. ++ ++ Therefore, eBPF calling convention is defined as: ++ ++ * R0 - return value from in-kernel function, and exit value for eBPF program ++ * R1 - R5 - arguments from eBPF program to in-kernel function ++ * R6 - R9 - callee saved registers that in-kernel function will preserve ++ * R10 - read-only frame pointer to access stack ++ ++ Thus, all eBPF registers map one to one to HW registers on x86_64, aarch64, ++ etc, and eBPF calling convention maps directly to ABIs used by the kernel on ++ 64-bit architectures. ++ ++ On 32-bit architectures JIT may map programs that use only 32-bit arithmetic ++ and may let more complex programs to be interpreted. ++ ++ R0 - R5 are scratch registers and eBPF program needs spill/fill them if ++ necessary across calls. Note that there is only one eBPF program (== one ++ eBPF main routine) and it cannot call other eBPF functions, it can only ++ call predefined in-kernel functions, though. ++ ++- Register width increases from 32-bit to 64-bit: ++ ++ Still, the semantics of the original 32-bit ALU operations are preserved ++ via 32-bit subregisters. All eBPF registers are 64-bit with 32-bit lower ++ subregisters that zero-extend into 64-bit if they are being written to. ++ That behavior maps directly to x86_64 and arm64 subregister definition, but ++ makes other JITs more difficult. ++ ++ 32-bit architectures run 64-bit eBPF programs via interpreter. ++ Their JITs may convert BPF programs that only use 32-bit subregisters into ++ native instruction set and let the rest being interpreted. ++ ++ Operation is 64-bit, because on 64-bit architectures, pointers are also ++ 64-bit wide, and we want to pass 64-bit values in/out of kernel functions, ++ so 32-bit eBPF registers would otherwise require to define register-pair ++ ABI, thus, there won't be able to use a direct eBPF register to HW register ++ mapping and JIT would need to do combine/split/move operations for every ++ register in and out of the function, which is complex, bug prone and slow. ++ Another reason is the use of atomic 64-bit counters. ++ ++- Conditional jt/jf targets replaced with jt/fall-through: ++ ++ While the original design has constructs such as ``if (cond) jump_true; ++ else jump_false;``, they are being replaced into alternative constructs like ++ ``if (cond) jump_true; /* else fall-through */``. ++ ++- Introduces bpf_call insn and register passing convention for zero overhead ++ calls from/to other kernel functions: ++ ++ Before an in-kernel function call, the eBPF program needs to ++ place function arguments into R1 to R5 registers to satisfy calling ++ convention, then the interpreter will take them from registers and pass ++ to in-kernel function. If R1 - R5 registers are mapped to CPU registers ++ that are used for argument passing on given architecture, the JIT compiler ++ doesn't need to emit extra moves. Function arguments will be in the correct ++ registers and BPF_CALL instruction will be JITed as single 'call' HW ++ instruction. This calling convention was picked to cover common call ++ situations without performance penalty. ++ ++ After an in-kernel function call, R1 - R5 are reset to unreadable and R0 has ++ a return value of the function. Since R6 - R9 are callee saved, their state ++ is preserved across the call. ++ ++ For example, consider three C functions:: ++ ++ u64 f1() { return (*_f2)(1); } ++ u64 f2(u64 a) { return f3(a + 1, a); } ++ u64 f3(u64 a, u64 b) { return a - b; } ++ ++ GCC can compile f1, f3 into x86_64:: ++ ++ f1: ++ movl $1, %edi ++ movq _f2(%rip), %rax ++ jmp *%rax ++ f3: ++ movq %rdi, %rax ++ subq %rsi, %rax ++ ret ++ ++ Function f2 in eBPF may look like:: ++ ++ f2: ++ bpf_mov R2, R1 ++ bpf_add R1, 1 ++ bpf_call f3 ++ bpf_exit ++ ++ If f2 is JITed and the pointer stored to ``_f2``. The calls f1 -> f2 -> f3 and ++ returns will be seamless. Without JIT, __bpf_prog_run() interpreter needs to ++ be used to call into f2. ++ ++ For practical reasons all eBPF programs have only one argument 'ctx' which is ++ already placed into R1 (e.g. on __bpf_prog_run() startup) and the programs ++ can call kernel functions with up to 5 arguments. Calls with 6 or more arguments ++ are currently not supported, but these restrictions can be lifted if necessary ++ in the future. ++ ++ On 64-bit architectures all register map to HW registers one to one. For ++ example, x86_64 JIT compiler can map them as ... ++ ++ :: ++ ++ R0 - rax ++ R1 - rdi ++ R2 - rsi ++ R3 - rdx ++ R4 - rcx ++ R5 - r8 ++ R6 - rbx ++ R7 - r13 ++ R8 - r14 ++ R9 - r15 ++ R10 - rbp ++ ++ ... since x86_64 ABI mandates rdi, rsi, rdx, rcx, r8, r9 for argument passing ++ and rbx, r12 - r15 are callee saved. ++ ++ Then the following eBPF pseudo-program:: ++ ++ bpf_mov R6, R1 /* save ctx */ ++ bpf_mov R2, 2 ++ bpf_mov R3, 3 ++ bpf_mov R4, 4 ++ bpf_mov R5, 5 ++ bpf_call foo ++ bpf_mov R7, R0 /* save foo() return value */ ++ bpf_mov R1, R6 /* restore ctx for next call */ ++ bpf_mov R2, 6 ++ bpf_mov R3, 7 ++ bpf_mov R4, 8 ++ bpf_mov R5, 9 ++ bpf_call bar ++ bpf_add R0, R7 ++ bpf_exit ++ ++ After JIT to x86_64 may look like:: ++ ++ push %rbp ++ mov %rsp,%rbp ++ sub $0x228,%rsp ++ mov %rbx,-0x228(%rbp) ++ mov %r13,-0x220(%rbp) ++ mov %rdi,%rbx ++ mov $0x2,%esi ++ mov $0x3,%edx ++ mov $0x4,%ecx ++ mov $0x5,%r8d ++ callq foo ++ mov %rax,%r13 ++ mov %rbx,%rdi ++ mov $0x6,%esi ++ mov $0x7,%edx ++ mov $0x8,%ecx ++ mov $0x9,%r8d ++ callq bar ++ add %r13,%rax ++ mov -0x228(%rbp),%rbx ++ mov -0x220(%rbp),%r13 ++ leaveq ++ retq ++ ++ Which is in this example equivalent in C to:: ++ ++ u64 bpf_filter(u64 ctx) ++ { ++ return foo(ctx, 2, 3, 4, 5) + bar(ctx, 6, 7, 8, 9); ++ } ++ ++ In-kernel functions foo() and bar() with prototype: u64 (*)(u64 arg1, u64 ++ arg2, u64 arg3, u64 arg4, u64 arg5); will receive arguments in proper ++ registers and place their return value into ``%rax`` which is R0 in eBPF. ++ Prologue and epilogue are emitted by JIT and are implicit in the ++ interpreter. R0-R5 are scratch registers, so eBPF program needs to preserve ++ them across the calls as defined by calling convention. ++ ++ For example the following program is invalid:: ++ ++ bpf_mov R1, 1 ++ bpf_call foo ++ bpf_mov R0, R1 ++ bpf_exit ++ ++ After the call the registers R1-R5 contain junk values and cannot be read. ++ An in-kernel `eBPF verifier`_ is used to validate eBPF programs. ++ ++Also in the new design, eBPF is limited to 4096 insns, which means that any ++program will terminate quickly and will only call a fixed number of kernel ++functions. Original BPF and eBPF are two operand instructions, ++which helps to do one-to-one mapping between eBPF insn and x86 insn during JIT. ++ ++The input context pointer for invoking the interpreter function is generic, ++its content is defined by a specific use case. For seccomp register R1 points ++to seccomp_data, for converted BPF filters R1 points to a skb. ++ ++A program, that is translated internally consists of the following elements:: ++ ++ op:16, jt:8, jf:8, k:32 ==> op:8, dst_reg:4, src_reg:4, off:16, imm:32 ++ ++So far 87 eBPF instructions were implemented. 8-bit 'op' opcode field ++has room for new instructions. Some of them may use 16/24/32 byte encoding. New ++instructions must be multiple of 8 bytes to preserve backward compatibility. ++ ++eBPF is a general purpose RISC instruction set. Not every register and ++every instruction are used during translation from original BPF to eBPF. ++For example, socket filters are not using ``exclusive add`` instruction, but ++tracing filters may do to maintain counters of events, for example. Register R9 ++is not used by socket filters either, but more complex filters may be running ++out of registers and would have to resort to spill/fill to stack. ++ ++eBPF can be used as a generic assembler for last step performance ++optimizations, socket filters and seccomp are using it as assembler. Tracing ++filters may use it as assembler to generate code from kernel. In kernel usage ++may not be bounded by security considerations, since generated eBPF code ++may be optimizing internal code path and not being exposed to the user space. ++Safety of eBPF can come from the `eBPF verifier`_. In such use cases as ++described, it may be used as safe instruction set. ++ ++Just like the original BPF, eBPF runs within a controlled environment, ++is deterministic and the kernel can easily prove that. The safety of the program ++can be determined in two steps: first step does depth-first-search to disallow ++loops and other CFG validation; second step starts from the first insn and ++descends all possible paths. It simulates execution of every insn and observes ++the state change of registers and stack. ++ ++eBPF opcode encoding ++==================== ++ ++eBPF is reusing most of the opcode encoding from classic to simplify conversion ++of classic BPF to eBPF. For arithmetic and jump instructions the 8-bit 'code' ++field is divided into three parts:: ++ ++ +----------------+--------+--------------------+ ++ | 4 bits | 1 bit | 3 bits | ++ | operation code | source | instruction class | ++ +----------------+--------+--------------------+ ++ (MSB) (LSB) ++ ++Three LSB bits store instruction class which is one of: ++ ++ =================== =============== ++ Classic BPF classes eBPF classes ++ =================== =============== ++ BPF_LD 0x00 BPF_LD 0x00 ++ BPF_LDX 0x01 BPF_LDX 0x01 ++ BPF_ST 0x02 BPF_ST 0x02 ++ BPF_STX 0x03 BPF_STX 0x03 ++ BPF_ALU 0x04 BPF_ALU 0x04 ++ BPF_JMP 0x05 BPF_JMP 0x05 ++ BPF_RET 0x06 BPF_JMP32 0x06 ++ BPF_MISC 0x07 BPF_ALU64 0x07 ++ =================== =============== ++ ++When BPF_CLASS(code) == BPF_ALU or BPF_JMP, 4th bit encodes source operand ... ++ ++ :: ++ ++ BPF_K 0x00 ++ BPF_X 0x08 ++ ++ * in classic BPF, this means:: ++ ++ BPF_SRC(code) == BPF_X - use register X as source operand ++ BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand ++ ++ * in eBPF, this means:: ++ ++ BPF_SRC(code) == BPF_X - use 'src_reg' register as source operand ++ BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand ++ ++... and four MSB bits store operation code. ++ ++If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 [ in eBPF ], BPF_OP(code) is one of:: ++ ++ BPF_ADD 0x00 ++ BPF_SUB 0x10 ++ BPF_MUL 0x20 ++ BPF_DIV 0x30 ++ BPF_OR 0x40 ++ BPF_AND 0x50 ++ BPF_LSH 0x60 ++ BPF_RSH 0x70 ++ BPF_NEG 0x80 ++ BPF_MOD 0x90 ++ BPF_XOR 0xa0 ++ BPF_MOV 0xb0 /* eBPF only: mov reg to reg */ ++ BPF_ARSH 0xc0 /* eBPF only: sign extending shift right */ ++ BPF_END 0xd0 /* eBPF only: endianness conversion */ ++ ++If BPF_CLASS(code) == BPF_JMP or BPF_JMP32 [ in eBPF ], BPF_OP(code) is one of:: ++ ++ BPF_JA 0x00 /* BPF_JMP only */ ++ BPF_JEQ 0x10 ++ BPF_JGT 0x20 ++ BPF_JGE 0x30 ++ BPF_JSET 0x40 ++ BPF_JNE 0x50 /* eBPF only: jump != */ ++ BPF_JSGT 0x60 /* eBPF only: signed '>' */ ++ BPF_JSGE 0x70 /* eBPF only: signed '>=' */ ++ BPF_CALL 0x80 /* eBPF BPF_JMP only: function call */ ++ BPF_EXIT 0x90 /* eBPF BPF_JMP only: function return */ ++ BPF_JLT 0xa0 /* eBPF only: unsigned '<' */ ++ BPF_JLE 0xb0 /* eBPF only: unsigned '<=' */ ++ BPF_JSLT 0xc0 /* eBPF only: signed '<' */ ++ BPF_JSLE 0xd0 /* eBPF only: signed '<=' */ ++ ++So BPF_ADD | BPF_X | BPF_ALU means 32-bit addition in both classic BPF ++and eBPF. There are only two registers in classic BPF, so it means A += X. ++In eBPF it means dst_reg = (u32) dst_reg + (u32) src_reg; similarly, ++BPF_XOR | BPF_K | BPF_ALU means A ^= imm32 in classic BPF and analogous ++src_reg = (u32) src_reg ^ (u32) imm32 in eBPF. ++ ++Classic BPF is using BPF_MISC class to represent A = X and X = A moves. ++eBPF is using BPF_MOV | BPF_X | BPF_ALU code instead. Since there are no ++BPF_MISC operations in eBPF, the class 7 is used as BPF_ALU64 to mean ++exactly the same operations as BPF_ALU, but with 64-bit wide operands ++instead. So BPF_ADD | BPF_X | BPF_ALU64 means 64-bit addition, i.e.: ++dst_reg = dst_reg + src_reg ++ ++Classic BPF wastes the whole BPF_RET class to represent a single ``ret`` ++operation. Classic BPF_RET | BPF_K means copy imm32 into return register ++and perform function exit. eBPF is modeled to match CPU, so BPF_JMP | BPF_EXIT ++in eBPF means function exit only. The eBPF program needs to store return ++value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is used as ++BPF_JMP32 to mean exactly the same operations as BPF_JMP, but with 32-bit wide ++operands for the comparisons instead. ++ ++For load and store instructions the 8-bit 'code' field is divided as:: ++ ++ +--------+--------+-------------------+ ++ | 3 bits | 2 bits | 3 bits | ++ | mode | size | instruction class | ++ +--------+--------+-------------------+ ++ (MSB) (LSB) ++ ++Size modifier is one of ... ++ ++:: ++ ++ BPF_W 0x00 /* word */ ++ BPF_H 0x08 /* half word */ ++ BPF_B 0x10 /* byte */ ++ BPF_DW 0x18 /* eBPF only, double word */ ++ ++... which encodes size of load/store operation:: ++ ++ B - 1 byte ++ H - 2 byte ++ W - 4 byte ++ DW - 8 byte (eBPF only) ++ ++Mode modifier is one of:: ++ ++ BPF_IMM 0x00 /* used for 32-bit mov in classic BPF and 64-bit in eBPF */ ++ BPF_ABS 0x20 ++ BPF_IND 0x40 ++ BPF_MEM 0x60 ++ BPF_LEN 0x80 /* classic BPF only, reserved in eBPF */ ++ BPF_MSH 0xa0 /* classic BPF only, reserved in eBPF */ ++ BPF_ATOMIC 0xc0 /* eBPF only, atomic operations */ ++ ++eBPF has two non-generic instructions: (BPF_ABS | | BPF_LD) and ++(BPF_IND | | BPF_LD) which are used to access packet data. ++ ++They had to be carried over from classic to have strong performance of ++socket filters running in eBPF interpreter. These instructions can only ++be used when interpreter context is a pointer to ``struct sk_buff`` and ++have seven implicit operands. Register R6 is an implicit input that must ++contain pointer to sk_buff. Register R0 is an implicit output which contains ++the data fetched from the packet. Registers R1-R5 are scratch registers ++and must not be used to store the data across BPF_ABS | BPF_LD or ++BPF_IND | BPF_LD instructions. ++ ++These instructions have implicit program exit condition as well. When ++eBPF program is trying to access the data beyond the packet boundary, ++the interpreter will abort the execution of the program. JIT compilers ++therefore must preserve this property. src_reg and imm32 fields are ++explicit inputs to these instructions. ++ ++For example:: ++ ++ BPF_IND | BPF_W | BPF_LD means: ++ ++ R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32)) ++ and R1 - R5 were scratched. ++ ++Unlike classic BPF instruction set, eBPF has generic load/store operations:: ++ ++ BPF_MEM | | BPF_STX: *(size *) (dst_reg + off) = src_reg ++ BPF_MEM | | BPF_ST: *(size *) (dst_reg + off) = imm32 ++ BPF_MEM | | BPF_LDX: dst_reg = *(size *) (src_reg + off) ++ ++Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. ++ ++It also includes atomic operations, which use the immediate field for extra ++encoding:: ++ ++ .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg ++ .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg ++ ++The basic atomic operations supported are:: ++ ++ BPF_ADD ++ BPF_AND ++ BPF_OR ++ BPF_XOR ++ ++Each having equivalent semantics with the ``BPF_ADD`` example, that is: the ++memory location addresed by ``dst_reg + off`` is atomically modified, with ++``src_reg`` as the other operand. If the ``BPF_FETCH`` flag is set in the ++immediate, then these operations also overwrite ``src_reg`` with the ++value that was in memory before it was modified. ++ ++The more special operations are:: ++ ++ BPF_XCHG ++ ++This atomically exchanges ``src_reg`` with the value addressed by ``dst_reg + ++off``. :: ++ ++ BPF_CMPXCHG ++ ++This atomically compares the value addressed by ``dst_reg + off`` with ++``R0``. If they match it is replaced with ``src_reg``. In either case, the ++value that was there before is zero-extended and loaded back to ``R0``. ++ ++Note that 1 and 2 byte atomic operations are not supported. ++ ++Clang can generate atomic instructions by default when ``-mcpu=v3`` is ++enabled. If a lower version for ``-mcpu`` is set, the only atomic instruction ++Clang can generate is ``BPF_ADD`` *without* ``BPF_FETCH``. If you need to enable ++the atomics features, while keeping a lower ``-mcpu`` version, you can use ++``-Xclang -target-feature -Xclang +alu32``. ++ ++You may encounter ``BPF_XADD`` - this is a legacy name for ``BPF_ATOMIC``, ++referring to the exclusive-add operation encoded when the immediate field is ++zero. ++ ++eBPF has one 16-byte instruction: ``BPF_LD | BPF_DW | BPF_IMM`` which consists ++of two consecutive ``struct bpf_insn`` 8-byte blocks and interpreted as single ++instruction that loads 64-bit immediate value into a dst_reg. ++Classic BPF has similar instruction: ``BPF_LD | BPF_W | BPF_IMM`` which loads ++32-bit immediate value into a register. ++ ++.. Links: ++.. _eBPF verifier: verifiers.rst +--- /dev/null ++++ b/Documentation/bpf/verifier.rst +@@ -0,0 +1,529 @@ ++ ++============= ++eBPF verifier ++============= ++ ++The safety of the eBPF program is determined in two steps. ++ ++First step does DAG check to disallow loops and other CFG validation. ++In particular it will detect programs that have unreachable instructions. ++(though classic BPF checker allows them) ++ ++Second step starts from the first insn and descends all possible paths. ++It simulates execution of every insn and observes the state change of ++registers and stack. ++ ++At the start of the program the register R1 contains a pointer to context ++and has type PTR_TO_CTX. ++If verifier sees an insn that does R2=R1, then R2 has now type ++PTR_TO_CTX as well and can be used on the right hand side of expression. ++If R1=PTR_TO_CTX and insn is R2=R1+R1, then R2=SCALAR_VALUE, ++since addition of two valid pointers makes invalid pointer. ++(In 'secure' mode verifier will reject any type of pointer arithmetic to make ++sure that kernel addresses don't leak to unprivileged users) ++ ++If register was never written to, it's not readable:: ++ ++ bpf_mov R0 = R2 ++ bpf_exit ++ ++will be rejected, since R2 is unreadable at the start of the program. ++ ++After kernel function call, R1-R5 are reset to unreadable and ++R0 has a return type of the function. ++ ++Since R6-R9 are callee saved, their state is preserved across the call. ++ ++:: ++ ++ bpf_mov R6 = 1 ++ bpf_call foo ++ bpf_mov R0 = R6 ++ bpf_exit ++ ++is a correct program. If there was R1 instead of R6, it would have ++been rejected. ++ ++load/store instructions are allowed only with registers of valid types, which ++are PTR_TO_CTX, PTR_TO_MAP, PTR_TO_STACK. They are bounds and alignment checked. ++For example:: ++ ++ bpf_mov R1 = 1 ++ bpf_mov R2 = 2 ++ bpf_xadd *(u32 *)(R1 + 3) += R2 ++ bpf_exit ++ ++will be rejected, since R1 doesn't have a valid pointer type at the time of ++execution of instruction bpf_xadd. ++ ++At the start R1 type is PTR_TO_CTX (a pointer to generic ``struct bpf_context``) ++A callback is used to customize verifier to restrict eBPF program access to only ++certain fields within ctx structure with specified size and alignment. ++ ++For example, the following insn:: ++ ++ bpf_ld R0 = *(u32 *)(R6 + 8) ++ ++intends to load a word from address R6 + 8 and store it into R0 ++If R6=PTR_TO_CTX, via is_valid_access() callback the verifier will know ++that offset 8 of size 4 bytes can be accessed for reading, otherwise ++the verifier will reject the program. ++If R6=PTR_TO_STACK, then access should be aligned and be within ++stack bounds, which are [-MAX_BPF_STACK, 0). In this example offset is 8, ++so it will fail verification, since it's out of bounds. ++ ++The verifier will allow eBPF program to read data from stack only after ++it wrote into it. ++ ++Classic BPF verifier does similar check with M[0-15] memory slots. ++For example:: ++ ++ bpf_ld R0 = *(u32 *)(R10 - 4) ++ bpf_exit ++ ++is invalid program. ++Though R10 is correct read-only register and has type PTR_TO_STACK ++and R10 - 4 is within stack bounds, there were no stores into that location. ++ ++Pointer register spill/fill is tracked as well, since four (R6-R9) ++callee saved registers may not be enough for some programs. ++ ++Allowed function calls are customized with bpf_verifier_ops->get_func_proto() ++The eBPF verifier will check that registers match argument constraints. ++After the call register R0 will be set to return type of the function. ++ ++Function calls is a main mechanism to extend functionality of eBPF programs. ++Socket filters may let programs to call one set of functions, whereas tracing ++filters may allow completely different set. ++ ++If a function made accessible to eBPF program, it needs to be thought through ++from safety point of view. The verifier will guarantee that the function is ++called with valid arguments. ++ ++seccomp vs socket filters have different security restrictions for classic BPF. ++Seccomp solves this by two stage verifier: classic BPF verifier is followed ++by seccomp verifier. In case of eBPF one configurable verifier is shared for ++all use cases. ++ ++See details of eBPF verifier in kernel/bpf/verifier.c ++ ++Register value tracking ++======================= ++ ++In order to determine the safety of an eBPF program, the verifier must track ++the range of possible values in each register and also in each stack slot. ++This is done with ``struct bpf_reg_state``, defined in include/linux/ ++bpf_verifier.h, which unifies tracking of scalar and pointer values. Each ++register state has a type, which is either NOT_INIT (the register has not been ++written to), SCALAR_VALUE (some value which is not usable as a pointer), or a ++pointer type. The types of pointers describe their base, as follows: ++ ++ ++ PTR_TO_CTX ++ Pointer to bpf_context. ++ CONST_PTR_TO_MAP ++ Pointer to struct bpf_map. "Const" because arithmetic ++ on these pointers is forbidden. ++ PTR_TO_MAP_VALUE ++ Pointer to the value stored in a map element. ++ PTR_TO_MAP_VALUE_OR_NULL ++ Either a pointer to a map value, or NULL; map accesses ++ (see maps.rst) return this type, which becomes a ++ PTR_TO_MAP_VALUE when checked != NULL. Arithmetic on ++ these pointers is forbidden. ++ PTR_TO_STACK ++ Frame pointer. ++ PTR_TO_PACKET ++ skb->data. ++ PTR_TO_PACKET_END ++ skb->data + headlen; arithmetic forbidden. ++ PTR_TO_SOCKET ++ Pointer to struct bpf_sock_ops, implicitly refcounted. ++ PTR_TO_SOCKET_OR_NULL ++ Either a pointer to a socket, or NULL; socket lookup ++ returns this type, which becomes a PTR_TO_SOCKET when ++ checked != NULL. PTR_TO_SOCKET is reference-counted, ++ so programs must release the reference through the ++ socket release function before the end of the program. ++ Arithmetic on these pointers is forbidden. ++ ++However, a pointer may be offset from this base (as a result of pointer ++arithmetic), and this is tracked in two parts: the 'fixed offset' and 'variable ++offset'. The former is used when an exactly-known value (e.g. an immediate ++operand) is added to a pointer, while the latter is used for values which are ++not exactly known. The variable offset is also used in SCALAR_VALUEs, to track ++the range of possible values in the register. ++ ++The verifier's knowledge about the variable offset consists of: ++ ++* minimum and maximum values as unsigned ++* minimum and maximum values as signed ++ ++* knowledge of the values of individual bits, in the form of a 'tnum': a u64 ++ 'mask' and a u64 'value'. 1s in the mask represent bits whose value is unknown; ++ 1s in the value represent bits known to be 1. Bits known to be 0 have 0 in both ++ mask and value; no bit should ever be 1 in both. For example, if a byte is read ++ into a register from memory, the register's top 56 bits are known zero, while ++ the low 8 are unknown - which is represented as the tnum (0x0; 0xff). If we ++ then OR this with 0x40, we get (0x40; 0xbf), then if we add 1 we get (0x0; ++ 0x1ff), because of potential carries. ++ ++Besides arithmetic, the register state can also be updated by conditional ++branches. For instance, if a SCALAR_VALUE is compared > 8, in the 'true' branch ++it will have a umin_value (unsigned minimum value) of 9, whereas in the 'false' ++branch it will have a umax_value of 8. A signed compare (with BPF_JSGT or ++BPF_JSGE) would instead update the signed minimum/maximum values. Information ++from the signed and unsigned bounds can be combined; for instance if a value is ++first tested < 8 and then tested s> 4, the verifier will conclude that the value ++is also > 4 and s< 8, since the bounds prevent crossing the sign boundary. ++ ++PTR_TO_PACKETs with a variable offset part have an 'id', which is common to all ++pointers sharing that same variable offset. This is important for packet range ++checks: after adding a variable to a packet pointer register A, if you then copy ++it to another register B and then add a constant 4 to A, both registers will ++share the same 'id' but the A will have a fixed offset of +4. Then if A is ++bounds-checked and found to be less than a PTR_TO_PACKET_END, the register B is ++now known to have a safe range of at least 4 bytes. See 'Direct packet access', ++below, for more on PTR_TO_PACKET ranges. ++ ++The 'id' field is also used on PTR_TO_MAP_VALUE_OR_NULL, common to all copies of ++the pointer returned from a map lookup. This means that when one copy is ++checked and found to be non-NULL, all copies can become PTR_TO_MAP_VALUEs. ++As well as range-checking, the tracked information is also used for enforcing ++alignment of pointer accesses. For instance, on most systems the packet pointer ++is 2 bytes after a 4-byte alignment. If a program adds 14 bytes to that to jump ++over the Ethernet header, then reads IHL and addes (IHL * 4), the resulting ++pointer will have a variable offset known to be 4n+2 for some n, so adding the 2 ++bytes (NET_IP_ALIGN) gives a 4-byte alignment and so word-sized accesses through ++that pointer are safe. ++The 'id' field is also used on PTR_TO_SOCKET and PTR_TO_SOCKET_OR_NULL, common ++to all copies of the pointer returned from a socket lookup. This has similar ++behaviour to the handling for PTR_TO_MAP_VALUE_OR_NULL->PTR_TO_MAP_VALUE, but ++it also handles reference tracking for the pointer. PTR_TO_SOCKET implicitly ++represents a reference to the corresponding ``struct sock``. To ensure that the ++reference is not leaked, it is imperative to NULL-check the reference and in ++the non-NULL case, and pass the valid reference to the socket release function. ++ ++Direct packet access ++==================== ++ ++In cls_bpf and act_bpf programs the verifier allows direct access to the packet ++data via skb->data and skb->data_end pointers. ++Ex:: ++ ++ 1: r4 = *(u32 *)(r1 +80) /* load skb->data_end */ ++ 2: r3 = *(u32 *)(r1 +76) /* load skb->data */ ++ 3: r5 = r3 ++ 4: r5 += 14 ++ 5: if r5 > r4 goto pc+16 ++ R1=ctx R3=pkt(id=0,off=0,r=14) R4=pkt_end R5=pkt(id=0,off=14,r=14) R10=fp ++ 6: r0 = *(u16 *)(r3 +12) /* access 12 and 13 bytes of the packet */ ++ ++this 2byte load from the packet is safe to do, since the program author ++did check ``if (skb->data + 14 > skb->data_end) goto err`` at insn #5 which ++means that in the fall-through case the register R3 (which points to skb->data) ++has at least 14 directly accessible bytes. The verifier marks it ++as R3=pkt(id=0,off=0,r=14). ++id=0 means that no additional variables were added to the register. ++off=0 means that no additional constants were added. ++r=14 is the range of safe access which means that bytes [R3, R3 + 14) are ok. ++Note that R5 is marked as R5=pkt(id=0,off=14,r=14). It also points ++to the packet data, but constant 14 was added to the register, so ++it now points to ``skb->data + 14`` and accessible range is [R5, R5 + 14 - 14) ++which is zero bytes. ++ ++More complex packet access may look like:: ++ ++ ++ R0=inv1 R1=ctx R3=pkt(id=0,off=0,r=14) R4=pkt_end R5=pkt(id=0,off=14,r=14) R10=fp ++ 6: r0 = *(u8 *)(r3 +7) /* load 7th byte from the packet */ ++ 7: r4 = *(u8 *)(r3 +12) ++ 8: r4 *= 14 ++ 9: r3 = *(u32 *)(r1 +76) /* load skb->data */ ++ 10: r3 += r4 ++ 11: r2 = r1 ++ 12: r2 <<= 48 ++ 13: r2 >>= 48 ++ 14: r3 += r2 ++ 15: r2 = r3 ++ 16: r2 += 8 ++ 17: r1 = *(u32 *)(r1 +80) /* load skb->data_end */ ++ 18: if r2 > r1 goto pc+2 ++ R0=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R1=pkt_end R2=pkt(id=2,off=8,r=8) R3=pkt(id=2,off=0,r=8) R4=inv(id=0,umax_value=3570,var_off=(0x0; 0xfffe)) R5=pkt(id=0,off=14,r=14) R10=fp ++ 19: r1 = *(u8 *)(r3 +4) ++ ++The state of the register R3 is R3=pkt(id=2,off=0,r=8) ++id=2 means that two ``r3 += rX`` instructions were seen, so r3 points to some ++offset within a packet and since the program author did ++``if (r3 + 8 > r1) goto err`` at insn #18, the safe range is [R3, R3 + 8). ++The verifier only allows 'add'/'sub' operations on packet registers. Any other ++operation will set the register state to 'SCALAR_VALUE' and it won't be ++available for direct packet access. ++ ++Operation ``r3 += rX`` may overflow and become less than original skb->data, ++therefore the verifier has to prevent that. So when it sees ``r3 += rX`` ++instruction and rX is more than 16-bit value, any subsequent bounds-check of r3 ++against skb->data_end will not give us 'range' information, so attempts to read ++through the pointer will give "invalid access to packet" error. ++ ++Ex. after insn ``r4 = *(u8 *)(r3 +12)`` (insn #7 above) the state of r4 is ++R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) which means that upper 56 bits ++of the register are guaranteed to be zero, and nothing is known about the lower ++8 bits. After insn ``r4 *= 14`` the state becomes ++R4=inv(id=0,umax_value=3570,var_off=(0x0; 0xfffe)), since multiplying an 8-bit ++value by constant 14 will keep upper 52 bits as zero, also the least significant ++bit will be zero as 14 is even. Similarly ``r2 >>= 48`` will make ++R2=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)), since the shift is not sign ++extending. This logic is implemented in adjust_reg_min_max_vals() function, ++which calls adjust_ptr_min_max_vals() for adding pointer to scalar (or vice ++versa) and adjust_scalar_min_max_vals() for operations on two scalars. ++ ++The end result is that bpf program author can access packet directly ++using normal C code as:: ++ ++ void *data = (void *)(long)skb->data; ++ void *data_end = (void *)(long)skb->data_end; ++ struct eth_hdr *eth = data; ++ struct iphdr *iph = data + sizeof(*eth); ++ struct udphdr *udp = data + sizeof(*eth) + sizeof(*iph); ++ ++ if (data + sizeof(*eth) + sizeof(*iph) + sizeof(*udp) > data_end) ++ return 0; ++ if (eth->h_proto != htons(ETH_P_IP)) ++ return 0; ++ if (iph->protocol != IPPROTO_UDP || iph->ihl != 5) ++ return 0; ++ if (udp->dest == 53 || udp->source == 9) ++ ...; ++ ++which makes such programs easier to write comparing to LD_ABS insn ++and significantly faster. ++ ++Pruning ++======= ++ ++The verifier does not actually walk all possible paths through the program. For ++each new branch to analyse, the verifier looks at all the states it's previously ++been in when at this instruction. If any of them contain the current state as a ++subset, the branch is 'pruned' - that is, the fact that the previous state was ++accepted implies the current state would be as well. For instance, if in the ++previous state, r1 held a packet-pointer, and in the current state, r1 holds a ++packet-pointer with a range as long or longer and at least as strict an ++alignment, then r1 is safe. Similarly, if r2 was NOT_INIT before then it can't ++have been used by any path from that point, so any value in r2 (including ++another NOT_INIT) is safe. The implementation is in the function regsafe(). ++Pruning considers not only the registers but also the stack (and any spilled ++registers it may hold). They must all be safe for the branch to be pruned. ++This is implemented in states_equal(). ++ ++Understanding eBPF verifier messages ++==================================== ++ ++The following are few examples of invalid eBPF programs and verifier error ++messages as seen in the log: ++ ++Program with unreachable instructions:: ++ ++ static struct bpf_insn prog[] = { ++ BPF_EXIT_INSN(), ++ BPF_EXIT_INSN(), ++ }; ++ ++Error: ++ ++ unreachable insn 1 ++ ++Program that reads uninitialized register:: ++ ++ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), ++ BPF_EXIT_INSN(), ++ ++Error:: ++ ++ 0: (bf) r0 = r2 ++ R2 !read_ok ++ ++Program that doesn't initialize R0 before exiting:: ++ ++ BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), ++ BPF_EXIT_INSN(), ++ ++Error:: ++ ++ 0: (bf) r2 = r1 ++ 1: (95) exit ++ R0 !read_ok ++ ++Program that accesses stack out of bounds:: ++ ++ BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0), ++ BPF_EXIT_INSN(), ++ ++Error:: ++ ++ 0: (7a) *(u64 *)(r10 +8) = 0 ++ invalid stack off=8 size=8 ++ ++Program that doesn't initialize stack before passing its address into function:: ++ ++ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), ++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), ++ BPF_LD_MAP_FD(BPF_REG_1, 0), ++ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), ++ BPF_EXIT_INSN(), ++ ++Error:: ++ ++ 0: (bf) r2 = r10 ++ 1: (07) r2 += -8 ++ 2: (b7) r1 = 0x0 ++ 3: (85) call 1 ++ invalid indirect read from stack off -8+0 size 8 ++ ++Program that uses invalid map_fd=0 while calling to map_lookup_elem() function:: ++ ++ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), ++ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), ++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), ++ BPF_LD_MAP_FD(BPF_REG_1, 0), ++ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), ++ BPF_EXIT_INSN(), ++ ++Error:: ++ ++ 0: (7a) *(u64 *)(r10 -8) = 0 ++ 1: (bf) r2 = r10 ++ 2: (07) r2 += -8 ++ 3: (b7) r1 = 0x0 ++ 4: (85) call 1 ++ fd 0 is not pointing to valid bpf_map ++ ++Program that doesn't check return value of map_lookup_elem() before accessing ++map element:: ++ ++ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), ++ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), ++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), ++ BPF_LD_MAP_FD(BPF_REG_1, 0), ++ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), ++ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), ++ BPF_EXIT_INSN(), ++ ++Error:: ++ ++ 0: (7a) *(u64 *)(r10 -8) = 0 ++ 1: (bf) r2 = r10 ++ 2: (07) r2 += -8 ++ 3: (b7) r1 = 0x0 ++ 4: (85) call 1 ++ 5: (7a) *(u64 *)(r0 +0) = 0 ++ R0 invalid mem access 'map_value_or_null' ++ ++Program that correctly checks map_lookup_elem() returned value for NULL, but ++accesses the memory with incorrect alignment:: ++ ++ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), ++ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), ++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), ++ BPF_LD_MAP_FD(BPF_REG_1, 0), ++ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), ++ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), ++ BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0), ++ BPF_EXIT_INSN(), ++ ++Error:: ++ ++ 0: (7a) *(u64 *)(r10 -8) = 0 ++ 1: (bf) r2 = r10 ++ 2: (07) r2 += -8 ++ 3: (b7) r1 = 1 ++ 4: (85) call 1 ++ 5: (15) if r0 == 0x0 goto pc+1 ++ R0=map_ptr R10=fp ++ 6: (7a) *(u64 *)(r0 +4) = 0 ++ misaligned access off 4 size 8 ++ ++Program that correctly checks map_lookup_elem() returned value for NULL and ++accesses memory with correct alignment in one side of 'if' branch, but fails ++to do so in the other side of 'if' branch:: ++ ++ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), ++ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), ++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), ++ BPF_LD_MAP_FD(BPF_REG_1, 0), ++ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), ++ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), ++ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), ++ BPF_EXIT_INSN(), ++ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1), ++ BPF_EXIT_INSN(), ++ ++Error:: ++ ++ 0: (7a) *(u64 *)(r10 -8) = 0 ++ 1: (bf) r2 = r10 ++ 2: (07) r2 += -8 ++ 3: (b7) r1 = 1 ++ 4: (85) call 1 ++ 5: (15) if r0 == 0x0 goto pc+2 ++ R0=map_ptr R10=fp ++ 6: (7a) *(u64 *)(r0 +0) = 0 ++ 7: (95) exit ++ ++ from 5 to 8: R0=imm0 R10=fp ++ 8: (7a) *(u64 *)(r0 +0) = 1 ++ R0 invalid mem access 'imm' ++ ++Program that performs a socket lookup then sets the pointer to NULL without ++checking it:: ++ ++ BPF_MOV64_IMM(BPF_REG_2, 0), ++ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8), ++ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), ++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), ++ BPF_MOV64_IMM(BPF_REG_3, 4), ++ BPF_MOV64_IMM(BPF_REG_4, 0), ++ BPF_MOV64_IMM(BPF_REG_5, 0), ++ BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp), ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ ++Error:: ++ ++ 0: (b7) r2 = 0 ++ 1: (63) *(u32 *)(r10 -8) = r2 ++ 2: (bf) r2 = r10 ++ 3: (07) r2 += -8 ++ 4: (b7) r3 = 4 ++ 5: (b7) r4 = 0 ++ 6: (b7) r5 = 0 ++ 7: (85) call bpf_sk_lookup_tcp#65 ++ 8: (b7) r0 = 0 ++ 9: (95) exit ++ Unreleased reference id=1, alloc_insn=7 ++ ++Program that performs a socket lookup but does not NULL-check the returned ++value:: ++ ++ BPF_MOV64_IMM(BPF_REG_2, 0), ++ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8), ++ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), ++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), ++ BPF_MOV64_IMM(BPF_REG_3, 4), ++ BPF_MOV64_IMM(BPF_REG_4, 0), ++ BPF_MOV64_IMM(BPF_REG_5, 0), ++ BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp), ++ BPF_EXIT_INSN(), ++ ++Error:: ++ ++ 0: (b7) r2 = 0 ++ 1: (63) *(u32 *)(r10 -8) = r2 ++ 2: (bf) r2 = r10 ++ 3: (07) r2 += -8 ++ 4: (b7) r3 = 4 ++ 5: (b7) r4 = 0 ++ 6: (b7) r5 = 0 ++ 7: (85) call bpf_sk_lookup_tcp#65 ++ 8: (95) exit ++ Unreleased reference id=1, alloc_insn=7 +--- a/Documentation/networking/filter.rst ++++ b/Documentation/networking/filter.rst +@@ -6,6 +6,13 @@ + Linux Socket Filtering aka Berkeley Packet Filter (BPF) + ======================================================= + ++Notice ++------ ++ ++This file used to document the eBPF format and mechanisms even when not ++related to socket filtering. The ../bpf/index.rst has more details ++on eBPF. ++ + Introduction + ------------ + +@@ -608,15 +615,11 @@ format with similar underlying principle + paragraphs is being used. However, the instruction set format is modelled + closer to the underlying architecture to mimic native instruction sets, so + that a better performance can be achieved (more details later). This new +-ISA is called 'eBPF'. (Note: eBPF which ++ISA is called eBPF. See the ../bpf/index.rst for details. (Note: eBPF which + originates from [e]xtended BPF is not the same as BPF extensions! While + eBPF is an ISA, BPF extensions date back to classic BPF's 'overloading' + of BPF_LD | BPF_{B,H,W} | BPF_ABS instruction.) + +-It is designed to be JITed with one to one mapping, which can also open up +-the possibility for GCC/LLVM compilers to generate optimized eBPF code through +-an eBPF backend that performs almost as fast as natively compiled code. +- + The new instruction set was originally designed with the possible goal in + mind to write programs in "restricted C" and compile into eBPF with a optional + GCC/LLVM backend, so that it can just-in-time map to modern 64-bit CPUs with +@@ -641,986 +644,6 @@ Currently, the classic BPF format is bei + sparc64, arm32, riscv64, riscv32 perform JIT compilation from eBPF + instruction set. + +-Some core changes of the new internal format: +- +-- Number of registers increase from 2 to 10: +- +- The old format had two registers A and X, and a hidden frame pointer. The +- new layout extends this to be 10 internal registers and a read-only frame +- pointer. Since 64-bit CPUs are passing arguments to functions via registers +- the number of args from eBPF program to in-kernel function is restricted +- to 5 and one register is used to accept return value from an in-kernel +- function. Natively, x86_64 passes first 6 arguments in registers, aarch64/ +- sparcv9/mips64 have 7 - 8 registers for arguments; x86_64 has 6 callee saved +- registers, and aarch64/sparcv9/mips64 have 11 or more callee saved registers. +- +- Therefore, eBPF calling convention is defined as: +- +- * R0 - return value from in-kernel function, and exit value for eBPF program +- * R1 - R5 - arguments from eBPF program to in-kernel function +- * R6 - R9 - callee saved registers that in-kernel function will preserve +- * R10 - read-only frame pointer to access stack +- +- Thus, all eBPF registers map one to one to HW registers on x86_64, aarch64, +- etc, and eBPF calling convention maps directly to ABIs used by the kernel on +- 64-bit architectures. +- +- On 32-bit architectures JIT may map programs that use only 32-bit arithmetic +- and may let more complex programs to be interpreted. +- +- R0 - R5 are scratch registers and eBPF program needs spill/fill them if +- necessary across calls. Note that there is only one eBPF program (== one +- eBPF main routine) and it cannot call other eBPF functions, it can only +- call predefined in-kernel functions, though. +- +-- Register width increases from 32-bit to 64-bit: +- +- Still, the semantics of the original 32-bit ALU operations are preserved +- via 32-bit subregisters. All eBPF registers are 64-bit with 32-bit lower +- subregisters that zero-extend into 64-bit if they are being written to. +- That behavior maps directly to x86_64 and arm64 subregister definition, but +- makes other JITs more difficult. +- +- 32-bit architectures run 64-bit eBPF programs via interpreter. +- Their JITs may convert BPF programs that only use 32-bit subregisters into +- native instruction set and let the rest being interpreted. +- +- Operation is 64-bit, because on 64-bit architectures, pointers are also +- 64-bit wide, and we want to pass 64-bit values in/out of kernel functions, +- so 32-bit eBPF registers would otherwise require to define register-pair +- ABI, thus, there won't be able to use a direct eBPF register to HW register +- mapping and JIT would need to do combine/split/move operations for every +- register in and out of the function, which is complex, bug prone and slow. +- Another reason is the use of atomic 64-bit counters. +- +-- Conditional jt/jf targets replaced with jt/fall-through: +- +- While the original design has constructs such as ``if (cond) jump_true; +- else jump_false;``, they are being replaced into alternative constructs like +- ``if (cond) jump_true; /* else fall-through */``. +- +-- Introduces bpf_call insn and register passing convention for zero overhead +- calls from/to other kernel functions: +- +- Before an in-kernel function call, the eBPF program needs to +- place function arguments into R1 to R5 registers to satisfy calling +- convention, then the interpreter will take them from registers and pass +- to in-kernel function. If R1 - R5 registers are mapped to CPU registers +- that are used for argument passing on given architecture, the JIT compiler +- doesn't need to emit extra moves. Function arguments will be in the correct +- registers and BPF_CALL instruction will be JITed as single 'call' HW +- instruction. This calling convention was picked to cover common call +- situations without performance penalty. +- +- After an in-kernel function call, R1 - R5 are reset to unreadable and R0 has +- a return value of the function. Since R6 - R9 are callee saved, their state +- is preserved across the call. +- +- For example, consider three C functions:: +- +- u64 f1() { return (*_f2)(1); } +- u64 f2(u64 a) { return f3(a + 1, a); } +- u64 f3(u64 a, u64 b) { return a - b; } +- +- GCC can compile f1, f3 into x86_64:: +- +- f1: +- movl $1, %edi +- movq _f2(%rip), %rax +- jmp *%rax +- f3: +- movq %rdi, %rax +- subq %rsi, %rax +- ret +- +- Function f2 in eBPF may look like:: +- +- f2: +- bpf_mov R2, R1 +- bpf_add R1, 1 +- bpf_call f3 +- bpf_exit +- +- If f2 is JITed and the pointer stored to ``_f2``. The calls f1 -> f2 -> f3 and +- returns will be seamless. Without JIT, __bpf_prog_run() interpreter needs to +- be used to call into f2. +- +- For practical reasons all eBPF programs have only one argument 'ctx' which is +- already placed into R1 (e.g. on __bpf_prog_run() startup) and the programs +- can call kernel functions with up to 5 arguments. Calls with 6 or more arguments +- are currently not supported, but these restrictions can be lifted if necessary +- in the future. +- +- On 64-bit architectures all register map to HW registers one to one. For +- example, x86_64 JIT compiler can map them as ... +- +- :: +- +- R0 - rax +- R1 - rdi +- R2 - rsi +- R3 - rdx +- R4 - rcx +- R5 - r8 +- R6 - rbx +- R7 - r13 +- R8 - r14 +- R9 - r15 +- R10 - rbp +- +- ... since x86_64 ABI mandates rdi, rsi, rdx, rcx, r8, r9 for argument passing +- and rbx, r12 - r15 are callee saved. +- +- Then the following eBPF pseudo-program:: +- +- bpf_mov R6, R1 /* save ctx */ +- bpf_mov R2, 2 +- bpf_mov R3, 3 +- bpf_mov R4, 4 +- bpf_mov R5, 5 +- bpf_call foo +- bpf_mov R7, R0 /* save foo() return value */ +- bpf_mov R1, R6 /* restore ctx for next call */ +- bpf_mov R2, 6 +- bpf_mov R3, 7 +- bpf_mov R4, 8 +- bpf_mov R5, 9 +- bpf_call bar +- bpf_add R0, R7 +- bpf_exit +- +- After JIT to x86_64 may look like:: +- +- push %rbp +- mov %rsp,%rbp +- sub $0x228,%rsp +- mov %rbx,-0x228(%rbp) +- mov %r13,-0x220(%rbp) +- mov %rdi,%rbx +- mov $0x2,%esi +- mov $0x3,%edx +- mov $0x4,%ecx +- mov $0x5,%r8d +- callq foo +- mov %rax,%r13 +- mov %rbx,%rdi +- mov $0x6,%esi +- mov $0x7,%edx +- mov $0x8,%ecx +- mov $0x9,%r8d +- callq bar +- add %r13,%rax +- mov -0x228(%rbp),%rbx +- mov -0x220(%rbp),%r13 +- leaveq +- retq +- +- Which is in this example equivalent in C to:: +- +- u64 bpf_filter(u64 ctx) +- { +- return foo(ctx, 2, 3, 4, 5) + bar(ctx, 6, 7, 8, 9); +- } +- +- In-kernel functions foo() and bar() with prototype: u64 (*)(u64 arg1, u64 +- arg2, u64 arg3, u64 arg4, u64 arg5); will receive arguments in proper +- registers and place their return value into ``%rax`` which is R0 in eBPF. +- Prologue and epilogue are emitted by JIT and are implicit in the +- interpreter. R0-R5 are scratch registers, so eBPF program needs to preserve +- them across the calls as defined by calling convention. +- +- For example the following program is invalid:: +- +- bpf_mov R1, 1 +- bpf_call foo +- bpf_mov R0, R1 +- bpf_exit +- +- After the call the registers R1-R5 contain junk values and cannot be read. +- An in-kernel eBPF verifier is used to validate eBPF programs. +- +-Also in the new design, eBPF is limited to 4096 insns, which means that any +-program will terminate quickly and will only call a fixed number of kernel +-functions. Original BPF and the new format are two operand instructions, +-which helps to do one-to-one mapping between eBPF insn and x86 insn during JIT. +- +-The input context pointer for invoking the interpreter function is generic, +-its content is defined by a specific use case. For seccomp register R1 points +-to seccomp_data, for converted BPF filters R1 points to a skb. +- +-A program, that is translated internally consists of the following elements:: +- +- op:16, jt:8, jf:8, k:32 ==> op:8, dst_reg:4, src_reg:4, off:16, imm:32 +- +-So far 87 eBPF instructions were implemented. 8-bit 'op' opcode field +-has room for new instructions. Some of them may use 16/24/32 byte encoding. New +-instructions must be multiple of 8 bytes to preserve backward compatibility. +- +-eBPF is a general purpose RISC instruction set. Not every register and +-every instruction are used during translation from original BPF to new format. +-For example, socket filters are not using ``exclusive add`` instruction, but +-tracing filters may do to maintain counters of events, for example. Register R9 +-is not used by socket filters either, but more complex filters may be running +-out of registers and would have to resort to spill/fill to stack. +- +-eBPF can be used as a generic assembler for last step performance +-optimizations, socket filters and seccomp are using it as assembler. Tracing +-filters may use it as assembler to generate code from kernel. In kernel usage +-may not be bounded by security considerations, since generated eBPF code +-may be optimizing internal code path and not being exposed to the user space. +-Safety of eBPF can come from a verifier (TBD). In such use cases as +-described, it may be used as safe instruction set. +- +-Just like the original BPF, the new format runs within a controlled environment, +-is deterministic and the kernel can easily prove that. The safety of the program +-can be determined in two steps: first step does depth-first-search to disallow +-loops and other CFG validation; second step starts from the first insn and +-descends all possible paths. It simulates execution of every insn and observes +-the state change of registers and stack. +- +-eBPF opcode encoding +--------------------- +- +-eBPF is reusing most of the opcode encoding from classic to simplify conversion +-of classic BPF to eBPF. For arithmetic and jump instructions the 8-bit 'code' +-field is divided into three parts:: +- +- +----------------+--------+--------------------+ +- | 4 bits | 1 bit | 3 bits | +- | operation code | source | instruction class | +- +----------------+--------+--------------------+ +- (MSB) (LSB) +- +-Three LSB bits store instruction class which is one of: +- +- =================== =============== +- Classic BPF classes eBPF classes +- =================== =============== +- BPF_LD 0x00 BPF_LD 0x00 +- BPF_LDX 0x01 BPF_LDX 0x01 +- BPF_ST 0x02 BPF_ST 0x02 +- BPF_STX 0x03 BPF_STX 0x03 +- BPF_ALU 0x04 BPF_ALU 0x04 +- BPF_JMP 0x05 BPF_JMP 0x05 +- BPF_RET 0x06 BPF_JMP32 0x06 +- BPF_MISC 0x07 BPF_ALU64 0x07 +- =================== =============== +- +-When BPF_CLASS(code) == BPF_ALU or BPF_JMP, 4th bit encodes source operand ... +- +- :: +- +- BPF_K 0x00 +- BPF_X 0x08 +- +- * in classic BPF, this means:: +- +- BPF_SRC(code) == BPF_X - use register X as source operand +- BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand +- +- * in eBPF, this means:: +- +- BPF_SRC(code) == BPF_X - use 'src_reg' register as source operand +- BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand +- +-... and four MSB bits store operation code. +- +-If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 [ in eBPF ], BPF_OP(code) is one of:: +- +- BPF_ADD 0x00 +- BPF_SUB 0x10 +- BPF_MUL 0x20 +- BPF_DIV 0x30 +- BPF_OR 0x40 +- BPF_AND 0x50 +- BPF_LSH 0x60 +- BPF_RSH 0x70 +- BPF_NEG 0x80 +- BPF_MOD 0x90 +- BPF_XOR 0xa0 +- BPF_MOV 0xb0 /* eBPF only: mov reg to reg */ +- BPF_ARSH 0xc0 /* eBPF only: sign extending shift right */ +- BPF_END 0xd0 /* eBPF only: endianness conversion */ +- +-If BPF_CLASS(code) == BPF_JMP or BPF_JMP32 [ in eBPF ], BPF_OP(code) is one of:: +- +- BPF_JA 0x00 /* BPF_JMP only */ +- BPF_JEQ 0x10 +- BPF_JGT 0x20 +- BPF_JGE 0x30 +- BPF_JSET 0x40 +- BPF_JNE 0x50 /* eBPF only: jump != */ +- BPF_JSGT 0x60 /* eBPF only: signed '>' */ +- BPF_JSGE 0x70 /* eBPF only: signed '>=' */ +- BPF_CALL 0x80 /* eBPF BPF_JMP only: function call */ +- BPF_EXIT 0x90 /* eBPF BPF_JMP only: function return */ +- BPF_JLT 0xa0 /* eBPF only: unsigned '<' */ +- BPF_JLE 0xb0 /* eBPF only: unsigned '<=' */ +- BPF_JSLT 0xc0 /* eBPF only: signed '<' */ +- BPF_JSLE 0xd0 /* eBPF only: signed '<=' */ +- +-So BPF_ADD | BPF_X | BPF_ALU means 32-bit addition in both classic BPF +-and eBPF. There are only two registers in classic BPF, so it means A += X. +-In eBPF it means dst_reg = (u32) dst_reg + (u32) src_reg; similarly, +-BPF_XOR | BPF_K | BPF_ALU means A ^= imm32 in classic BPF and analogous +-src_reg = (u32) src_reg ^ (u32) imm32 in eBPF. +- +-Classic BPF is using BPF_MISC class to represent A = X and X = A moves. +-eBPF is using BPF_MOV | BPF_X | BPF_ALU code instead. Since there are no +-BPF_MISC operations in eBPF, the class 7 is used as BPF_ALU64 to mean +-exactly the same operations as BPF_ALU, but with 64-bit wide operands +-instead. So BPF_ADD | BPF_X | BPF_ALU64 means 64-bit addition, i.e.: +-dst_reg = dst_reg + src_reg +- +-Classic BPF wastes the whole BPF_RET class to represent a single ``ret`` +-operation. Classic BPF_RET | BPF_K means copy imm32 into return register +-and perform function exit. eBPF is modeled to match CPU, so BPF_JMP | BPF_EXIT +-in eBPF means function exit only. The eBPF program needs to store return +-value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is used as +-BPF_JMP32 to mean exactly the same operations as BPF_JMP, but with 32-bit wide +-operands for the comparisons instead. +- +-For load and store instructions the 8-bit 'code' field is divided as:: +- +- +--------+--------+-------------------+ +- | 3 bits | 2 bits | 3 bits | +- | mode | size | instruction class | +- +--------+--------+-------------------+ +- (MSB) (LSB) +- +-Size modifier is one of ... +- +-:: +- +- BPF_W 0x00 /* word */ +- BPF_H 0x08 /* half word */ +- BPF_B 0x10 /* byte */ +- BPF_DW 0x18 /* eBPF only, double word */ +- +-... which encodes size of load/store operation:: +- +- B - 1 byte +- H - 2 byte +- W - 4 byte +- DW - 8 byte (eBPF only) +- +-Mode modifier is one of:: +- +- BPF_IMM 0x00 /* used for 32-bit mov in classic BPF and 64-bit in eBPF */ +- BPF_ABS 0x20 +- BPF_IND 0x40 +- BPF_MEM 0x60 +- BPF_LEN 0x80 /* classic BPF only, reserved in eBPF */ +- BPF_MSH 0xa0 /* classic BPF only, reserved in eBPF */ +- BPF_ATOMIC 0xc0 /* eBPF only, atomic operations */ +- +-eBPF has two non-generic instructions: (BPF_ABS | | BPF_LD) and +-(BPF_IND | | BPF_LD) which are used to access packet data. +- +-They had to be carried over from classic to have strong performance of +-socket filters running in eBPF interpreter. These instructions can only +-be used when interpreter context is a pointer to ``struct sk_buff`` and +-have seven implicit operands. Register R6 is an implicit input that must +-contain pointer to sk_buff. Register R0 is an implicit output which contains +-the data fetched from the packet. Registers R1-R5 are scratch registers +-and must not be used to store the data across BPF_ABS | BPF_LD or +-BPF_IND | BPF_LD instructions. +- +-These instructions have implicit program exit condition as well. When +-eBPF program is trying to access the data beyond the packet boundary, +-the interpreter will abort the execution of the program. JIT compilers +-therefore must preserve this property. src_reg and imm32 fields are +-explicit inputs to these instructions. +- +-For example:: +- +- BPF_IND | BPF_W | BPF_LD means: +- +- R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32)) +- and R1 - R5 were scratched. +- +-Unlike classic BPF instruction set, eBPF has generic load/store operations:: +- +- BPF_MEM | | BPF_STX: *(size *) (dst_reg + off) = src_reg +- BPF_MEM | | BPF_ST: *(size *) (dst_reg + off) = imm32 +- BPF_MEM | | BPF_LDX: dst_reg = *(size *) (src_reg + off) +- +-Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. +- +-It also includes atomic operations, which use the immediate field for extra +-encoding:: +- +- .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg +- .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg +- +-The basic atomic operations supported are:: +- +- BPF_ADD +- BPF_AND +- BPF_OR +- BPF_XOR +- +-Each having equivalent semantics with the ``BPF_ADD`` example, that is: the +-memory location addresed by ``dst_reg + off`` is atomically modified, with +-``src_reg`` as the other operand. If the ``BPF_FETCH`` flag is set in the +-immediate, then these operations also overwrite ``src_reg`` with the +-value that was in memory before it was modified. +- +-The more special operations are:: +- +- BPF_XCHG +- +-This atomically exchanges ``src_reg`` with the value addressed by ``dst_reg + +-off``. :: +- +- BPF_CMPXCHG +- +-This atomically compares the value addressed by ``dst_reg + off`` with +-``R0``. If they match it is replaced with ``src_reg``. In either case, the +-value that was there before is zero-extended and loaded back to ``R0``. +- +-Note that 1 and 2 byte atomic operations are not supported. +- +-Clang can generate atomic instructions by default when ``-mcpu=v3`` is +-enabled. If a lower version for ``-mcpu`` is set, the only atomic instruction +-Clang can generate is ``BPF_ADD`` *without* ``BPF_FETCH``. If you need to enable +-the atomics features, while keeping a lower ``-mcpu`` version, you can use +-``-Xclang -target-feature -Xclang +alu32``. +- +-You may encounter ``BPF_XADD`` - this is a legacy name for ``BPF_ATOMIC``, +-referring to the exclusive-add operation encoded when the immediate field is +-zero. +- +-eBPF has one 16-byte instruction: ``BPF_LD | BPF_DW | BPF_IMM`` which consists +-of two consecutive ``struct bpf_insn`` 8-byte blocks and interpreted as single +-instruction that loads 64-bit immediate value into a dst_reg. +-Classic BPF has similar instruction: ``BPF_LD | BPF_W | BPF_IMM`` which loads +-32-bit immediate value into a register. +- +-eBPF verifier +-------------- +-The safety of the eBPF program is determined in two steps. +- +-First step does DAG check to disallow loops and other CFG validation. +-In particular it will detect programs that have unreachable instructions. +-(though classic BPF checker allows them) +- +-Second step starts from the first insn and descends all possible paths. +-It simulates execution of every insn and observes the state change of +-registers and stack. +- +-At the start of the program the register R1 contains a pointer to context +-and has type PTR_TO_CTX. +-If verifier sees an insn that does R2=R1, then R2 has now type +-PTR_TO_CTX as well and can be used on the right hand side of expression. +-If R1=PTR_TO_CTX and insn is R2=R1+R1, then R2=SCALAR_VALUE, +-since addition of two valid pointers makes invalid pointer. +-(In 'secure' mode verifier will reject any type of pointer arithmetic to make +-sure that kernel addresses don't leak to unprivileged users) +- +-If register was never written to, it's not readable:: +- +- bpf_mov R0 = R2 +- bpf_exit +- +-will be rejected, since R2 is unreadable at the start of the program. +- +-After kernel function call, R1-R5 are reset to unreadable and +-R0 has a return type of the function. +- +-Since R6-R9 are callee saved, their state is preserved across the call. +- +-:: +- +- bpf_mov R6 = 1 +- bpf_call foo +- bpf_mov R0 = R6 +- bpf_exit +- +-is a correct program. If there was R1 instead of R6, it would have +-been rejected. +- +-load/store instructions are allowed only with registers of valid types, which +-are PTR_TO_CTX, PTR_TO_MAP, PTR_TO_STACK. They are bounds and alignment checked. +-For example:: +- +- bpf_mov R1 = 1 +- bpf_mov R2 = 2 +- bpf_xadd *(u32 *)(R1 + 3) += R2 +- bpf_exit +- +-will be rejected, since R1 doesn't have a valid pointer type at the time of +-execution of instruction bpf_xadd. +- +-At the start R1 type is PTR_TO_CTX (a pointer to generic ``struct bpf_context``) +-A callback is used to customize verifier to restrict eBPF program access to only +-certain fields within ctx structure with specified size and alignment. +- +-For example, the following insn:: +- +- bpf_ld R0 = *(u32 *)(R6 + 8) +- +-intends to load a word from address R6 + 8 and store it into R0 +-If R6=PTR_TO_CTX, via is_valid_access() callback the verifier will know +-that offset 8 of size 4 bytes can be accessed for reading, otherwise +-the verifier will reject the program. +-If R6=PTR_TO_STACK, then access should be aligned and be within +-stack bounds, which are [-MAX_BPF_STACK, 0). In this example offset is 8, +-so it will fail verification, since it's out of bounds. +- +-The verifier will allow eBPF program to read data from stack only after +-it wrote into it. +- +-Classic BPF verifier does similar check with M[0-15] memory slots. +-For example:: +- +- bpf_ld R0 = *(u32 *)(R10 - 4) +- bpf_exit +- +-is invalid program. +-Though R10 is correct read-only register and has type PTR_TO_STACK +-and R10 - 4 is within stack bounds, there were no stores into that location. +- +-Pointer register spill/fill is tracked as well, since four (R6-R9) +-callee saved registers may not be enough for some programs. +- +-Allowed function calls are customized with bpf_verifier_ops->get_func_proto() +-The eBPF verifier will check that registers match argument constraints. +-After the call register R0 will be set to return type of the function. +- +-Function calls is a main mechanism to extend functionality of eBPF programs. +-Socket filters may let programs to call one set of functions, whereas tracing +-filters may allow completely different set. +- +-If a function made accessible to eBPF program, it needs to be thought through +-from safety point of view. The verifier will guarantee that the function is +-called with valid arguments. +- +-seccomp vs socket filters have different security restrictions for classic BPF. +-Seccomp solves this by two stage verifier: classic BPF verifier is followed +-by seccomp verifier. In case of eBPF one configurable verifier is shared for +-all use cases. +- +-See details of eBPF verifier in kernel/bpf/verifier.c +- +-Register value tracking +------------------------ +-In order to determine the safety of an eBPF program, the verifier must track +-the range of possible values in each register and also in each stack slot. +-This is done with ``struct bpf_reg_state``, defined in include/linux/ +-bpf_verifier.h, which unifies tracking of scalar and pointer values. Each +-register state has a type, which is either NOT_INIT (the register has not been +-written to), SCALAR_VALUE (some value which is not usable as a pointer), or a +-pointer type. The types of pointers describe their base, as follows: +- +- +- PTR_TO_CTX +- Pointer to bpf_context. +- CONST_PTR_TO_MAP +- Pointer to struct bpf_map. "Const" because arithmetic +- on these pointers is forbidden. +- PTR_TO_MAP_VALUE +- Pointer to the value stored in a map element. +- PTR_TO_MAP_VALUE_OR_NULL +- Either a pointer to a map value, or NULL; map accesses +- (see maps.rst) return this type, which becomes a +- a PTR_TO_MAP_VALUE when checked != NULL. Arithmetic on +- these pointers is forbidden. +- PTR_TO_STACK +- Frame pointer. +- PTR_TO_PACKET +- skb->data. +- PTR_TO_PACKET_END +- skb->data + headlen; arithmetic forbidden. +- PTR_TO_SOCKET +- Pointer to struct bpf_sock_ops, implicitly refcounted. +- PTR_TO_SOCKET_OR_NULL +- Either a pointer to a socket, or NULL; socket lookup +- returns this type, which becomes a PTR_TO_SOCKET when +- checked != NULL. PTR_TO_SOCKET is reference-counted, +- so programs must release the reference through the +- socket release function before the end of the program. +- Arithmetic on these pointers is forbidden. +- +-However, a pointer may be offset from this base (as a result of pointer +-arithmetic), and this is tracked in two parts: the 'fixed offset' and 'variable +-offset'. The former is used when an exactly-known value (e.g. an immediate +-operand) is added to a pointer, while the latter is used for values which are +-not exactly known. The variable offset is also used in SCALAR_VALUEs, to track +-the range of possible values in the register. +- +-The verifier's knowledge about the variable offset consists of: +- +-* minimum and maximum values as unsigned +-* minimum and maximum values as signed +- +-* knowledge of the values of individual bits, in the form of a 'tnum': a u64 +- 'mask' and a u64 'value'. 1s in the mask represent bits whose value is unknown; +- 1s in the value represent bits known to be 1. Bits known to be 0 have 0 in both +- mask and value; no bit should ever be 1 in both. For example, if a byte is read +- into a register from memory, the register's top 56 bits are known zero, while +- the low 8 are unknown - which is represented as the tnum (0x0; 0xff). If we +- then OR this with 0x40, we get (0x40; 0xbf), then if we add 1 we get (0x0; +- 0x1ff), because of potential carries. +- +-Besides arithmetic, the register state can also be updated by conditional +-branches. For instance, if a SCALAR_VALUE is compared > 8, in the 'true' branch +-it will have a umin_value (unsigned minimum value) of 9, whereas in the 'false' +-branch it will have a umax_value of 8. A signed compare (with BPF_JSGT or +-BPF_JSGE) would instead update the signed minimum/maximum values. Information +-from the signed and unsigned bounds can be combined; for instance if a value is +-first tested < 8 and then tested s> 4, the verifier will conclude that the value +-is also > 4 and s< 8, since the bounds prevent crossing the sign boundary. +- +-PTR_TO_PACKETs with a variable offset part have an 'id', which is common to all +-pointers sharing that same variable offset. This is important for packet range +-checks: after adding a variable to a packet pointer register A, if you then copy +-it to another register B and then add a constant 4 to A, both registers will +-share the same 'id' but the A will have a fixed offset of +4. Then if A is +-bounds-checked and found to be less than a PTR_TO_PACKET_END, the register B is +-now known to have a safe range of at least 4 bytes. See 'Direct packet access', +-below, for more on PTR_TO_PACKET ranges. +- +-The 'id' field is also used on PTR_TO_MAP_VALUE_OR_NULL, common to all copies of +-the pointer returned from a map lookup. This means that when one copy is +-checked and found to be non-NULL, all copies can become PTR_TO_MAP_VALUEs. +-As well as range-checking, the tracked information is also used for enforcing +-alignment of pointer accesses. For instance, on most systems the packet pointer +-is 2 bytes after a 4-byte alignment. If a program adds 14 bytes to that to jump +-over the Ethernet header, then reads IHL and addes (IHL * 4), the resulting +-pointer will have a variable offset known to be 4n+2 for some n, so adding the 2 +-bytes (NET_IP_ALIGN) gives a 4-byte alignment and so word-sized accesses through +-that pointer are safe. +-The 'id' field is also used on PTR_TO_SOCKET and PTR_TO_SOCKET_OR_NULL, common +-to all copies of the pointer returned from a socket lookup. This has similar +-behaviour to the handling for PTR_TO_MAP_VALUE_OR_NULL->PTR_TO_MAP_VALUE, but +-it also handles reference tracking for the pointer. PTR_TO_SOCKET implicitly +-represents a reference to the corresponding ``struct sock``. To ensure that the +-reference is not leaked, it is imperative to NULL-check the reference and in +-the non-NULL case, and pass the valid reference to the socket release function. +- +-Direct packet access +--------------------- +-In cls_bpf and act_bpf programs the verifier allows direct access to the packet +-data via skb->data and skb->data_end pointers. +-Ex:: +- +- 1: r4 = *(u32 *)(r1 +80) /* load skb->data_end */ +- 2: r3 = *(u32 *)(r1 +76) /* load skb->data */ +- 3: r5 = r3 +- 4: r5 += 14 +- 5: if r5 > r4 goto pc+16 +- R1=ctx R3=pkt(id=0,off=0,r=14) R4=pkt_end R5=pkt(id=0,off=14,r=14) R10=fp +- 6: r0 = *(u16 *)(r3 +12) /* access 12 and 13 bytes of the packet */ +- +-this 2byte load from the packet is safe to do, since the program author +-did check ``if (skb->data + 14 > skb->data_end) goto err`` at insn #5 which +-means that in the fall-through case the register R3 (which points to skb->data) +-has at least 14 directly accessible bytes. The verifier marks it +-as R3=pkt(id=0,off=0,r=14). +-id=0 means that no additional variables were added to the register. +-off=0 means that no additional constants were added. +-r=14 is the range of safe access which means that bytes [R3, R3 + 14) are ok. +-Note that R5 is marked as R5=pkt(id=0,off=14,r=14). It also points +-to the packet data, but constant 14 was added to the register, so +-it now points to ``skb->data + 14`` and accessible range is [R5, R5 + 14 - 14) +-which is zero bytes. +- +-More complex packet access may look like:: +- +- +- R0=inv1 R1=ctx R3=pkt(id=0,off=0,r=14) R4=pkt_end R5=pkt(id=0,off=14,r=14) R10=fp +- 6: r0 = *(u8 *)(r3 +7) /* load 7th byte from the packet */ +- 7: r4 = *(u8 *)(r3 +12) +- 8: r4 *= 14 +- 9: r3 = *(u32 *)(r1 +76) /* load skb->data */ +- 10: r3 += r4 +- 11: r2 = r1 +- 12: r2 <<= 48 +- 13: r2 >>= 48 +- 14: r3 += r2 +- 15: r2 = r3 +- 16: r2 += 8 +- 17: r1 = *(u32 *)(r1 +80) /* load skb->data_end */ +- 18: if r2 > r1 goto pc+2 +- R0=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R1=pkt_end R2=pkt(id=2,off=8,r=8) R3=pkt(id=2,off=0,r=8) R4=inv(id=0,umax_value=3570,var_off=(0x0; 0xfffe)) R5=pkt(id=0,off=14,r=14) R10=fp +- 19: r1 = *(u8 *)(r3 +4) +- +-The state of the register R3 is R3=pkt(id=2,off=0,r=8) +-id=2 means that two ``r3 += rX`` instructions were seen, so r3 points to some +-offset within a packet and since the program author did +-``if (r3 + 8 > r1) goto err`` at insn #18, the safe range is [R3, R3 + 8). +-The verifier only allows 'add'/'sub' operations on packet registers. Any other +-operation will set the register state to 'SCALAR_VALUE' and it won't be +-available for direct packet access. +- +-Operation ``r3 += rX`` may overflow and become less than original skb->data, +-therefore the verifier has to prevent that. So when it sees ``r3 += rX`` +-instruction and rX is more than 16-bit value, any subsequent bounds-check of r3 +-against skb->data_end will not give us 'range' information, so attempts to read +-through the pointer will give "invalid access to packet" error. +- +-Ex. after insn ``r4 = *(u8 *)(r3 +12)`` (insn #7 above) the state of r4 is +-R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) which means that upper 56 bits +-of the register are guaranteed to be zero, and nothing is known about the lower +-8 bits. After insn ``r4 *= 14`` the state becomes +-R4=inv(id=0,umax_value=3570,var_off=(0x0; 0xfffe)), since multiplying an 8-bit +-value by constant 14 will keep upper 52 bits as zero, also the least significant +-bit will be zero as 14 is even. Similarly ``r2 >>= 48`` will make +-R2=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)), since the shift is not sign +-extending. This logic is implemented in adjust_reg_min_max_vals() function, +-which calls adjust_ptr_min_max_vals() for adding pointer to scalar (or vice +-versa) and adjust_scalar_min_max_vals() for operations on two scalars. +- +-The end result is that bpf program author can access packet directly +-using normal C code as:: +- +- void *data = (void *)(long)skb->data; +- void *data_end = (void *)(long)skb->data_end; +- struct eth_hdr *eth = data; +- struct iphdr *iph = data + sizeof(*eth); +- struct udphdr *udp = data + sizeof(*eth) + sizeof(*iph); +- +- if (data + sizeof(*eth) + sizeof(*iph) + sizeof(*udp) > data_end) +- return 0; +- if (eth->h_proto != htons(ETH_P_IP)) +- return 0; +- if (iph->protocol != IPPROTO_UDP || iph->ihl != 5) +- return 0; +- if (udp->dest == 53 || udp->source == 9) +- ...; +- +-which makes such programs easier to write comparing to LD_ABS insn +-and significantly faster. +- +-Pruning +-------- +-The verifier does not actually walk all possible paths through the program. For +-each new branch to analyse, the verifier looks at all the states it's previously +-been in when at this instruction. If any of them contain the current state as a +-subset, the branch is 'pruned' - that is, the fact that the previous state was +-accepted implies the current state would be as well. For instance, if in the +-previous state, r1 held a packet-pointer, and in the current state, r1 holds a +-packet-pointer with a range as long or longer and at least as strict an +-alignment, then r1 is safe. Similarly, if r2 was NOT_INIT before then it can't +-have been used by any path from that point, so any value in r2 (including +-another NOT_INIT) is safe. The implementation is in the function regsafe(). +-Pruning considers not only the registers but also the stack (and any spilled +-registers it may hold). They must all be safe for the branch to be pruned. +-This is implemented in states_equal(). +- +-Understanding eBPF verifier messages +------------------------------------- +- +-The following are few examples of invalid eBPF programs and verifier error +-messages as seen in the log: +- +-Program with unreachable instructions:: +- +- static struct bpf_insn prog[] = { +- BPF_EXIT_INSN(), +- BPF_EXIT_INSN(), +- }; +- +-Error: +- +- unreachable insn 1 +- +-Program that reads uninitialized register:: +- +- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), +- BPF_EXIT_INSN(), +- +-Error:: +- +- 0: (bf) r0 = r2 +- R2 !read_ok +- +-Program that doesn't initialize R0 before exiting:: +- +- BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), +- BPF_EXIT_INSN(), +- +-Error:: +- +- 0: (bf) r2 = r1 +- 1: (95) exit +- R0 !read_ok +- +-Program that accesses stack out of bounds:: +- +- BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0), +- BPF_EXIT_INSN(), +- +-Error:: +- +- 0: (7a) *(u64 *)(r10 +8) = 0 +- invalid stack off=8 size=8 +- +-Program that doesn't initialize stack before passing its address into function:: +- +- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), +- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), +- BPF_LD_MAP_FD(BPF_REG_1, 0), +- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), +- BPF_EXIT_INSN(), +- +-Error:: +- +- 0: (bf) r2 = r10 +- 1: (07) r2 += -8 +- 2: (b7) r1 = 0x0 +- 3: (85) call 1 +- invalid indirect read from stack off -8+0 size 8 +- +-Program that uses invalid map_fd=0 while calling to map_lookup_elem() function:: +- +- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), +- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), +- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), +- BPF_LD_MAP_FD(BPF_REG_1, 0), +- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), +- BPF_EXIT_INSN(), +- +-Error:: +- +- 0: (7a) *(u64 *)(r10 -8) = 0 +- 1: (bf) r2 = r10 +- 2: (07) r2 += -8 +- 3: (b7) r1 = 0x0 +- 4: (85) call 1 +- fd 0 is not pointing to valid bpf_map +- +-Program that doesn't check return value of map_lookup_elem() before accessing +-map element:: +- +- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), +- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), +- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), +- BPF_LD_MAP_FD(BPF_REG_1, 0), +- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), +- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), +- BPF_EXIT_INSN(), +- +-Error:: +- +- 0: (7a) *(u64 *)(r10 -8) = 0 +- 1: (bf) r2 = r10 +- 2: (07) r2 += -8 +- 3: (b7) r1 = 0x0 +- 4: (85) call 1 +- 5: (7a) *(u64 *)(r0 +0) = 0 +- R0 invalid mem access 'map_value_or_null' +- +-Program that correctly checks map_lookup_elem() returned value for NULL, but +-accesses the memory with incorrect alignment:: +- +- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), +- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), +- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), +- BPF_LD_MAP_FD(BPF_REG_1, 0), +- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), +- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), +- BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0), +- BPF_EXIT_INSN(), +- +-Error:: +- +- 0: (7a) *(u64 *)(r10 -8) = 0 +- 1: (bf) r2 = r10 +- 2: (07) r2 += -8 +- 3: (b7) r1 = 1 +- 4: (85) call 1 +- 5: (15) if r0 == 0x0 goto pc+1 +- R0=map_ptr R10=fp +- 6: (7a) *(u64 *)(r0 +4) = 0 +- misaligned access off 4 size 8 +- +-Program that correctly checks map_lookup_elem() returned value for NULL and +-accesses memory with correct alignment in one side of 'if' branch, but fails +-to do so in the other side of 'if' branch:: +- +- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), +- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), +- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), +- BPF_LD_MAP_FD(BPF_REG_1, 0), +- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), +- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), +- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), +- BPF_EXIT_INSN(), +- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1), +- BPF_EXIT_INSN(), +- +-Error:: +- +- 0: (7a) *(u64 *)(r10 -8) = 0 +- 1: (bf) r2 = r10 +- 2: (07) r2 += -8 +- 3: (b7) r1 = 1 +- 4: (85) call 1 +- 5: (15) if r0 == 0x0 goto pc+2 +- R0=map_ptr R10=fp +- 6: (7a) *(u64 *)(r0 +0) = 0 +- 7: (95) exit +- +- from 5 to 8: R0=imm0 R10=fp +- 8: (7a) *(u64 *)(r0 +0) = 1 +- R0 invalid mem access 'imm' +- +-Program that performs a socket lookup then sets the pointer to NULL without +-checking it:: +- +- BPF_MOV64_IMM(BPF_REG_2, 0), +- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8), +- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), +- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), +- BPF_MOV64_IMM(BPF_REG_3, 4), +- BPF_MOV64_IMM(BPF_REG_4, 0), +- BPF_MOV64_IMM(BPF_REG_5, 0), +- BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp), +- BPF_MOV64_IMM(BPF_REG_0, 0), +- BPF_EXIT_INSN(), +- +-Error:: +- +- 0: (b7) r2 = 0 +- 1: (63) *(u32 *)(r10 -8) = r2 +- 2: (bf) r2 = r10 +- 3: (07) r2 += -8 +- 4: (b7) r3 = 4 +- 5: (b7) r4 = 0 +- 6: (b7) r5 = 0 +- 7: (85) call bpf_sk_lookup_tcp#65 +- 8: (b7) r0 = 0 +- 9: (95) exit +- Unreleased reference id=1, alloc_insn=7 +- +-Program that performs a socket lookup but does not NULL-check the returned +-value:: +- +- BPF_MOV64_IMM(BPF_REG_2, 0), +- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8), +- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), +- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), +- BPF_MOV64_IMM(BPF_REG_3, 4), +- BPF_MOV64_IMM(BPF_REG_4, 0), +- BPF_MOV64_IMM(BPF_REG_5, 0), +- BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp), +- BPF_EXIT_INSN(), +- +-Error:: +- +- 0: (b7) r2 = 0 +- 1: (63) *(u32 *)(r10 -8) = r2 +- 2: (bf) r2 = r10 +- 3: (07) r2 += -8 +- 4: (b7) r3 = 4 +- 5: (b7) r4 = 0 +- 6: (b7) r5 = 0 +- 7: (85) call bpf_sk_lookup_tcp#65 +- 8: (95) exit +- Unreleased reference id=1, alloc_insn=7 +- + Testing + ------- + diff --git a/patches.suse/bpf-docs-Split-the-comparism-to-classic-BPF-from-ins.patch b/patches.suse/bpf-docs-Split-the-comparism-to-classic-BPF-from-ins.patch new file mode 100644 index 0000000..b02661a --- /dev/null +++ b/patches.suse/bpf-docs-Split-the-comparism-to-classic-BPF-from-ins.patch @@ -0,0 +1,869 @@ +From: Christoph Hellwig +Date: Thu, 23 Dec 2021 11:19:04 +0100 +Subject: bpf, docs: Split the comparism to classic BPF from + instruction-set.rst +Patch-mainline: v5.17-rc1 +Git-commit: 41db511a3a1622aa97064a3447e878eeb1a594b7 +References: jsc#PED-1368 + +Split the introductory that explain eBPF vs classic BPF and how it maps +to hardware from the instruction set specification into a standalone +document. This duplicates a little bit of information but gives us a +useful reference for the eBPF instrution set that is not encumbered by +classic BPF. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211223101906.977624-3-hch@lst.de +Acked-by: Shung-Hsi Yu +--- + Documentation/bpf/classic_vs_extended.rst | 376 +++++++++++++++++++++++++++++ + Documentation/bpf/index.rst | 1 + Documentation/bpf/instruction-set.rst | 380 +++++------------------------- + 3 files changed, 446 insertions(+), 311 deletions(-) + create mode 100644 Documentation/bpf/classic_vs_extended.rst + +--- /dev/null ++++ b/Documentation/bpf/classic_vs_extended.rst +@@ -0,0 +1,376 @@ ++ ++=================== ++Classic BPF vs eBPF ++=================== ++ ++eBPF is designed to be JITed with one to one mapping, which can also open up ++the possibility for GCC/LLVM compilers to generate optimized eBPF code through ++an eBPF backend that performs almost as fast as natively compiled code. ++ ++Some core changes of the eBPF format from classic BPF: ++ ++- Number of registers increase from 2 to 10: ++ ++ The old format had two registers A and X, and a hidden frame pointer. The ++ new layout extends this to be 10 internal registers and a read-only frame ++ pointer. Since 64-bit CPUs are passing arguments to functions via registers ++ the number of args from eBPF program to in-kernel function is restricted ++ to 5 and one register is used to accept return value from an in-kernel ++ function. Natively, x86_64 passes first 6 arguments in registers, aarch64/ ++ sparcv9/mips64 have 7 - 8 registers for arguments; x86_64 has 6 callee saved ++ registers, and aarch64/sparcv9/mips64 have 11 or more callee saved registers. ++ ++ Thus, all eBPF registers map one to one to HW registers on x86_64, aarch64, ++ etc, and eBPF calling convention maps directly to ABIs used by the kernel on ++ 64-bit architectures. ++ ++ On 32-bit architectures JIT may map programs that use only 32-bit arithmetic ++ and may let more complex programs to be interpreted. ++ ++ R0 - R5 are scratch registers and eBPF program needs spill/fill them if ++ necessary across calls. Note that there is only one eBPF program (== one ++ eBPF main routine) and it cannot call other eBPF functions, it can only ++ call predefined in-kernel functions, though. ++ ++- Register width increases from 32-bit to 64-bit: ++ ++ Still, the semantics of the original 32-bit ALU operations are preserved ++ via 32-bit subregisters. All eBPF registers are 64-bit with 32-bit lower ++ subregisters that zero-extend into 64-bit if they are being written to. ++ That behavior maps directly to x86_64 and arm64 subregister definition, but ++ makes other JITs more difficult. ++ ++ 32-bit architectures run 64-bit eBPF programs via interpreter. ++ Their JITs may convert BPF programs that only use 32-bit subregisters into ++ native instruction set and let the rest being interpreted. ++ ++ Operation is 64-bit, because on 64-bit architectures, pointers are also ++ 64-bit wide, and we want to pass 64-bit values in/out of kernel functions, ++ so 32-bit eBPF registers would otherwise require to define register-pair ++ ABI, thus, there won't be able to use a direct eBPF register to HW register ++ mapping and JIT would need to do combine/split/move operations for every ++ register in and out of the function, which is complex, bug prone and slow. ++ Another reason is the use of atomic 64-bit counters. ++ ++- Conditional jt/jf targets replaced with jt/fall-through: ++ ++ While the original design has constructs such as ``if (cond) jump_true; ++ else jump_false;``, they are being replaced into alternative constructs like ++ ``if (cond) jump_true; /* else fall-through */``. ++ ++- Introduces bpf_call insn and register passing convention for zero overhead ++ calls from/to other kernel functions: ++ ++ Before an in-kernel function call, the eBPF program needs to ++ place function arguments into R1 to R5 registers to satisfy calling ++ convention, then the interpreter will take them from registers and pass ++ to in-kernel function. If R1 - R5 registers are mapped to CPU registers ++ that are used for argument passing on given architecture, the JIT compiler ++ doesn't need to emit extra moves. Function arguments will be in the correct ++ registers and BPF_CALL instruction will be JITed as single 'call' HW ++ instruction. This calling convention was picked to cover common call ++ situations without performance penalty. ++ ++ After an in-kernel function call, R1 - R5 are reset to unreadable and R0 has ++ a return value of the function. Since R6 - R9 are callee saved, their state ++ is preserved across the call. ++ ++ For example, consider three C functions:: ++ ++ u64 f1() { return (*_f2)(1); } ++ u64 f2(u64 a) { return f3(a + 1, a); } ++ u64 f3(u64 a, u64 b) { return a - b; } ++ ++ GCC can compile f1, f3 into x86_64:: ++ ++ f1: ++ movl $1, %edi ++ movq _f2(%rip), %rax ++ jmp *%rax ++ f3: ++ movq %rdi, %rax ++ subq %rsi, %rax ++ ret ++ ++ Function f2 in eBPF may look like:: ++ ++ f2: ++ bpf_mov R2, R1 ++ bpf_add R1, 1 ++ bpf_call f3 ++ bpf_exit ++ ++ If f2 is JITed and the pointer stored to ``_f2``. The calls f1 -> f2 -> f3 and ++ returns will be seamless. Without JIT, __bpf_prog_run() interpreter needs to ++ be used to call into f2. ++ ++ For practical reasons all eBPF programs have only one argument 'ctx' which is ++ already placed into R1 (e.g. on __bpf_prog_run() startup) and the programs ++ can call kernel functions with up to 5 arguments. Calls with 6 or more arguments ++ are currently not supported, but these restrictions can be lifted if necessary ++ in the future. ++ ++ On 64-bit architectures all register map to HW registers one to one. For ++ example, x86_64 JIT compiler can map them as ... ++ ++ :: ++ ++ R0 - rax ++ R1 - rdi ++ R2 - rsi ++ R3 - rdx ++ R4 - rcx ++ R5 - r8 ++ R6 - rbx ++ R7 - r13 ++ R8 - r14 ++ R9 - r15 ++ R10 - rbp ++ ++ ... since x86_64 ABI mandates rdi, rsi, rdx, rcx, r8, r9 for argument passing ++ and rbx, r12 - r15 are callee saved. ++ ++ Then the following eBPF pseudo-program:: ++ ++ bpf_mov R6, R1 /* save ctx */ ++ bpf_mov R2, 2 ++ bpf_mov R3, 3 ++ bpf_mov R4, 4 ++ bpf_mov R5, 5 ++ bpf_call foo ++ bpf_mov R7, R0 /* save foo() return value */ ++ bpf_mov R1, R6 /* restore ctx for next call */ ++ bpf_mov R2, 6 ++ bpf_mov R3, 7 ++ bpf_mov R4, 8 ++ bpf_mov R5, 9 ++ bpf_call bar ++ bpf_add R0, R7 ++ bpf_exit ++ ++ After JIT to x86_64 may look like:: ++ ++ push %rbp ++ mov %rsp,%rbp ++ sub $0x228,%rsp ++ mov %rbx,-0x228(%rbp) ++ mov %r13,-0x220(%rbp) ++ mov %rdi,%rbx ++ mov $0x2,%esi ++ mov $0x3,%edx ++ mov $0x4,%ecx ++ mov $0x5,%r8d ++ callq foo ++ mov %rax,%r13 ++ mov %rbx,%rdi ++ mov $0x6,%esi ++ mov $0x7,%edx ++ mov $0x8,%ecx ++ mov $0x9,%r8d ++ callq bar ++ add %r13,%rax ++ mov -0x228(%rbp),%rbx ++ mov -0x220(%rbp),%r13 ++ leaveq ++ retq ++ ++ Which is in this example equivalent in C to:: ++ ++ u64 bpf_filter(u64 ctx) ++ { ++ return foo(ctx, 2, 3, 4, 5) + bar(ctx, 6, 7, 8, 9); ++ } ++ ++ In-kernel functions foo() and bar() with prototype: u64 (*)(u64 arg1, u64 ++ arg2, u64 arg3, u64 arg4, u64 arg5); will receive arguments in proper ++ registers and place their return value into ``%rax`` which is R0 in eBPF. ++ Prologue and epilogue are emitted by JIT and are implicit in the ++ interpreter. R0-R5 are scratch registers, so eBPF program needs to preserve ++ them across the calls as defined by calling convention. ++ ++ For example the following program is invalid:: ++ ++ bpf_mov R1, 1 ++ bpf_call foo ++ bpf_mov R0, R1 ++ bpf_exit ++ ++ After the call the registers R1-R5 contain junk values and cannot be read. ++ An in-kernel verifier.rst is used to validate eBPF programs. ++ ++Also in the new design, eBPF is limited to 4096 insns, which means that any ++program will terminate quickly and will only call a fixed number of kernel ++functions. Original BPF and eBPF are two operand instructions, ++which helps to do one-to-one mapping between eBPF insn and x86 insn during JIT. ++ ++The input context pointer for invoking the interpreter function is generic, ++its content is defined by a specific use case. For seccomp register R1 points ++to seccomp_data, for converted BPF filters R1 points to a skb. ++ ++A program, that is translated internally consists of the following elements:: ++ ++ op:16, jt:8, jf:8, k:32 ==> op:8, dst_reg:4, src_reg:4, off:16, imm:32 ++ ++So far 87 eBPF instructions were implemented. 8-bit 'op' opcode field ++has room for new instructions. Some of them may use 16/24/32 byte encoding. New ++instructions must be multiple of 8 bytes to preserve backward compatibility. ++ ++eBPF is a general purpose RISC instruction set. Not every register and ++every instruction are used during translation from original BPF to eBPF. ++For example, socket filters are not using ``exclusive add`` instruction, but ++tracing filters may do to maintain counters of events, for example. Register R9 ++is not used by socket filters either, but more complex filters may be running ++out of registers and would have to resort to spill/fill to stack. ++ ++eBPF can be used as a generic assembler for last step performance ++optimizations, socket filters and seccomp are using it as assembler. Tracing ++filters may use it as assembler to generate code from kernel. In kernel usage ++may not be bounded by security considerations, since generated eBPF code ++may be optimizing internal code path and not being exposed to the user space. ++Safety of eBPF can come from the verifier.rst. In such use cases as ++described, it may be used as safe instruction set. ++ ++Just like the original BPF, eBPF runs within a controlled environment, ++is deterministic and the kernel can easily prove that. The safety of the program ++can be determined in two steps: first step does depth-first-search to disallow ++loops and other CFG validation; second step starts from the first insn and ++descends all possible paths. It simulates execution of every insn and observes ++the state change of registers and stack. ++ ++opcode encoding ++=============== ++ ++eBPF is reusing most of the opcode encoding from classic to simplify conversion ++of classic BPF to eBPF. ++ ++For arithmetic and jump instructions the 8-bit 'code' field is divided into three ++parts:: ++ ++ +----------------+--------+--------------------+ ++ | 4 bits | 1 bit | 3 bits | ++ | operation code | source | instruction class | ++ +----------------+--------+--------------------+ ++ (MSB) (LSB) ++ ++Three LSB bits store instruction class which is one of: ++ ++ =================== =============== ++ Classic BPF classes eBPF classes ++ =================== =============== ++ BPF_LD 0x00 BPF_LD 0x00 ++ BPF_LDX 0x01 BPF_LDX 0x01 ++ BPF_ST 0x02 BPF_ST 0x02 ++ BPF_STX 0x03 BPF_STX 0x03 ++ BPF_ALU 0x04 BPF_ALU 0x04 ++ BPF_JMP 0x05 BPF_JMP 0x05 ++ BPF_RET 0x06 BPF_JMP32 0x06 ++ BPF_MISC 0x07 BPF_ALU64 0x07 ++ =================== =============== ++ ++The 4th bit encodes the source operand ... ++ ++ :: ++ ++ BPF_K 0x00 ++ BPF_X 0x08 ++ ++ * in classic BPF, this means:: ++ ++ BPF_SRC(code) == BPF_X - use register X as source operand ++ BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand ++ ++ * in eBPF, this means:: ++ ++ BPF_SRC(code) == BPF_X - use 'src_reg' register as source operand ++ BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand ++ ++... and four MSB bits store operation code. ++ ++If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 [ in eBPF ], BPF_OP(code) is one of:: ++ ++ BPF_ADD 0x00 ++ BPF_SUB 0x10 ++ BPF_MUL 0x20 ++ BPF_DIV 0x30 ++ BPF_OR 0x40 ++ BPF_AND 0x50 ++ BPF_LSH 0x60 ++ BPF_RSH 0x70 ++ BPF_NEG 0x80 ++ BPF_MOD 0x90 ++ BPF_XOR 0xa0 ++ BPF_MOV 0xb0 /* eBPF only: mov reg to reg */ ++ BPF_ARSH 0xc0 /* eBPF only: sign extending shift right */ ++ BPF_END 0xd0 /* eBPF only: endianness conversion */ ++ ++If BPF_CLASS(code) == BPF_JMP or BPF_JMP32 [ in eBPF ], BPF_OP(code) is one of:: ++ ++ BPF_JA 0x00 /* BPF_JMP only */ ++ BPF_JEQ 0x10 ++ BPF_JGT 0x20 ++ BPF_JGE 0x30 ++ BPF_JSET 0x40 ++ BPF_JNE 0x50 /* eBPF only: jump != */ ++ BPF_JSGT 0x60 /* eBPF only: signed '>' */ ++ BPF_JSGE 0x70 /* eBPF only: signed '>=' */ ++ BPF_CALL 0x80 /* eBPF BPF_JMP only: function call */ ++ BPF_EXIT 0x90 /* eBPF BPF_JMP only: function return */ ++ BPF_JLT 0xa0 /* eBPF only: unsigned '<' */ ++ BPF_JLE 0xb0 /* eBPF only: unsigned '<=' */ ++ BPF_JSLT 0xc0 /* eBPF only: signed '<' */ ++ BPF_JSLE 0xd0 /* eBPF only: signed '<=' */ ++ ++So BPF_ADD | BPF_X | BPF_ALU means 32-bit addition in both classic BPF ++and eBPF. There are only two registers in classic BPF, so it means A += X. ++In eBPF it means dst_reg = (u32) dst_reg + (u32) src_reg; similarly, ++BPF_XOR | BPF_K | BPF_ALU means A ^= imm32 in classic BPF and analogous ++src_reg = (u32) src_reg ^ (u32) imm32 in eBPF. ++ ++Classic BPF is using BPF_MISC class to represent A = X and X = A moves. ++eBPF is using BPF_MOV | BPF_X | BPF_ALU code instead. Since there are no ++BPF_MISC operations in eBPF, the class 7 is used as BPF_ALU64 to mean ++exactly the same operations as BPF_ALU, but with 64-bit wide operands ++instead. So BPF_ADD | BPF_X | BPF_ALU64 means 64-bit addition, i.e.: ++dst_reg = dst_reg + src_reg ++ ++Classic BPF wastes the whole BPF_RET class to represent a single ``ret`` ++operation. Classic BPF_RET | BPF_K means copy imm32 into return register ++and perform function exit. eBPF is modeled to match CPU, so BPF_JMP | BPF_EXIT ++in eBPF means function exit only. The eBPF program needs to store return ++value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is used as ++BPF_JMP32 to mean exactly the same operations as BPF_JMP, but with 32-bit wide ++operands for the comparisons instead. ++ ++For load and store instructions the 8-bit 'code' field is divided as:: ++ ++ +--------+--------+-------------------+ ++ | 3 bits | 2 bits | 3 bits | ++ | mode | size | instruction class | ++ +--------+--------+-------------------+ ++ (MSB) (LSB) ++ ++Size modifier is one of ... ++ ++:: ++ ++ BPF_W 0x00 /* word */ ++ BPF_H 0x08 /* half word */ ++ BPF_B 0x10 /* byte */ ++ BPF_DW 0x18 /* eBPF only, double word */ ++ ++... which encodes size of load/store operation:: ++ ++ B - 1 byte ++ H - 2 byte ++ W - 4 byte ++ DW - 8 byte (eBPF only) ++ ++Mode modifier is one of:: ++ ++ BPF_IMM 0x00 /* used for 32-bit mov in classic BPF and 64-bit in eBPF */ ++ BPF_ABS 0x20 ++ BPF_IND 0x40 ++ BPF_MEM 0x60 ++ BPF_LEN 0x80 /* classic BPF only, reserved in eBPF */ ++ BPF_MSH 0xa0 /* classic BPF only, reserved in eBPF */ ++ BPF_ATOMIC 0xc0 /* eBPF only, atomic operations */ +--- a/Documentation/bpf/index.rst ++++ b/Documentation/bpf/index.rst +@@ -21,6 +21,7 @@ that goes into great technical depth abo + helpers + programs + maps ++ classic_vs_extended.rst + bpf_licensing + test_debug + other +--- a/Documentation/bpf/instruction-set.rst ++++ b/Documentation/bpf/instruction-set.rst +@@ -3,253 +3,27 @@ + eBPF Instruction Set + ==================== + +-eBPF is designed to be JITed with one to one mapping, which can also open up +-the possibility for GCC/LLVM compilers to generate optimized eBPF code through +-an eBPF backend that performs almost as fast as natively compiled code. +- +-Some core changes of the eBPF format from classic BPF: +- +-- Number of registers increase from 2 to 10: +- +- The old format had two registers A and X, and a hidden frame pointer. The +- new layout extends this to be 10 internal registers and a read-only frame +- pointer. Since 64-bit CPUs are passing arguments to functions via registers +- the number of args from eBPF program to in-kernel function is restricted +- to 5 and one register is used to accept return value from an in-kernel +- function. Natively, x86_64 passes first 6 arguments in registers, aarch64/ +- sparcv9/mips64 have 7 - 8 registers for arguments; x86_64 has 6 callee saved +- registers, and aarch64/sparcv9/mips64 have 11 or more callee saved registers. +- +- Therefore, eBPF calling convention is defined as: +- +- * R0 - return value from in-kernel function, and exit value for eBPF program +- * R1 - R5 - arguments from eBPF program to in-kernel function +- * R6 - R9 - callee saved registers that in-kernel function will preserve +- * R10 - read-only frame pointer to access stack +- +- Thus, all eBPF registers map one to one to HW registers on x86_64, aarch64, +- etc, and eBPF calling convention maps directly to ABIs used by the kernel on +- 64-bit architectures. +- +- On 32-bit architectures JIT may map programs that use only 32-bit arithmetic +- and may let more complex programs to be interpreted. +- +- R0 - R5 are scratch registers and eBPF program needs spill/fill them if +- necessary across calls. Note that there is only one eBPF program (== one +- eBPF main routine) and it cannot call other eBPF functions, it can only +- call predefined in-kernel functions, though. +- +-- Register width increases from 32-bit to 64-bit: +- +- Still, the semantics of the original 32-bit ALU operations are preserved +- via 32-bit subregisters. All eBPF registers are 64-bit with 32-bit lower +- subregisters that zero-extend into 64-bit if they are being written to. +- That behavior maps directly to x86_64 and arm64 subregister definition, but +- makes other JITs more difficult. +- +- 32-bit architectures run 64-bit eBPF programs via interpreter. +- Their JITs may convert BPF programs that only use 32-bit subregisters into +- native instruction set and let the rest being interpreted. +- +- Operation is 64-bit, because on 64-bit architectures, pointers are also +- 64-bit wide, and we want to pass 64-bit values in/out of kernel functions, +- so 32-bit eBPF registers would otherwise require to define register-pair +- ABI, thus, there won't be able to use a direct eBPF register to HW register +- mapping and JIT would need to do combine/split/move operations for every +- register in and out of the function, which is complex, bug prone and slow. +- Another reason is the use of atomic 64-bit counters. +- +-- Conditional jt/jf targets replaced with jt/fall-through: +- +- While the original design has constructs such as ``if (cond) jump_true; +- else jump_false;``, they are being replaced into alternative constructs like +- ``if (cond) jump_true; /* else fall-through */``. +- +-- Introduces bpf_call insn and register passing convention for zero overhead +- calls from/to other kernel functions: +- +- Before an in-kernel function call, the eBPF program needs to +- place function arguments into R1 to R5 registers to satisfy calling +- convention, then the interpreter will take them from registers and pass +- to in-kernel function. If R1 - R5 registers are mapped to CPU registers +- that are used for argument passing on given architecture, the JIT compiler +- doesn't need to emit extra moves. Function arguments will be in the correct +- registers and BPF_CALL instruction will be JITed as single 'call' HW +- instruction. This calling convention was picked to cover common call +- situations without performance penalty. +- +- After an in-kernel function call, R1 - R5 are reset to unreadable and R0 has +- a return value of the function. Since R6 - R9 are callee saved, their state +- is preserved across the call. +- +- For example, consider three C functions:: +- +- u64 f1() { return (*_f2)(1); } +- u64 f2(u64 a) { return f3(a + 1, a); } +- u64 f3(u64 a, u64 b) { return a - b; } +- +- GCC can compile f1, f3 into x86_64:: +- +- f1: +- movl $1, %edi +- movq _f2(%rip), %rax +- jmp *%rax +- f3: +- movq %rdi, %rax +- subq %rsi, %rax +- ret +- +- Function f2 in eBPF may look like:: +- +- f2: +- bpf_mov R2, R1 +- bpf_add R1, 1 +- bpf_call f3 +- bpf_exit +- +- If f2 is JITed and the pointer stored to ``_f2``. The calls f1 -> f2 -> f3 and +- returns will be seamless. Without JIT, __bpf_prog_run() interpreter needs to +- be used to call into f2. +- +- For practical reasons all eBPF programs have only one argument 'ctx' which is +- already placed into R1 (e.g. on __bpf_prog_run() startup) and the programs +- can call kernel functions with up to 5 arguments. Calls with 6 or more arguments +- are currently not supported, but these restrictions can be lifted if necessary +- in the future. +- +- On 64-bit architectures all register map to HW registers one to one. For +- example, x86_64 JIT compiler can map them as ... +- +- :: +- +- R0 - rax +- R1 - rdi +- R2 - rsi +- R3 - rdx +- R4 - rcx +- R5 - r8 +- R6 - rbx +- R7 - r13 +- R8 - r14 +- R9 - r15 +- R10 - rbp +- +- ... since x86_64 ABI mandates rdi, rsi, rdx, rcx, r8, r9 for argument passing +- and rbx, r12 - r15 are callee saved. +- +- Then the following eBPF pseudo-program:: +- +- bpf_mov R6, R1 /* save ctx */ +- bpf_mov R2, 2 +- bpf_mov R3, 3 +- bpf_mov R4, 4 +- bpf_mov R5, 5 +- bpf_call foo +- bpf_mov R7, R0 /* save foo() return value */ +- bpf_mov R1, R6 /* restore ctx for next call */ +- bpf_mov R2, 6 +- bpf_mov R3, 7 +- bpf_mov R4, 8 +- bpf_mov R5, 9 +- bpf_call bar +- bpf_add R0, R7 +- bpf_exit +- +- After JIT to x86_64 may look like:: +- +- push %rbp +- mov %rsp,%rbp +- sub $0x228,%rsp +- mov %rbx,-0x228(%rbp) +- mov %r13,-0x220(%rbp) +- mov %rdi,%rbx +- mov $0x2,%esi +- mov $0x3,%edx +- mov $0x4,%ecx +- mov $0x5,%r8d +- callq foo +- mov %rax,%r13 +- mov %rbx,%rdi +- mov $0x6,%esi +- mov $0x7,%edx +- mov $0x8,%ecx +- mov $0x9,%r8d +- callq bar +- add %r13,%rax +- mov -0x228(%rbp),%rbx +- mov -0x220(%rbp),%r13 +- leaveq +- retq +- +- Which is in this example equivalent in C to:: +- +- u64 bpf_filter(u64 ctx) +- { +- return foo(ctx, 2, 3, 4, 5) + bar(ctx, 6, 7, 8, 9); +- } +- +- In-kernel functions foo() and bar() with prototype: u64 (*)(u64 arg1, u64 +- arg2, u64 arg3, u64 arg4, u64 arg5); will receive arguments in proper +- registers and place their return value into ``%rax`` which is R0 in eBPF. +- Prologue and epilogue are emitted by JIT and are implicit in the +- interpreter. R0-R5 are scratch registers, so eBPF program needs to preserve +- them across the calls as defined by calling convention. +- +- For example the following program is invalid:: +- +- bpf_mov R1, 1 +- bpf_call foo +- bpf_mov R0, R1 +- bpf_exit +- +- After the call the registers R1-R5 contain junk values and cannot be read. +- An in-kernel verifier.rst is used to validate eBPF programs. +- +-Also in the new design, eBPF is limited to 4096 insns, which means that any +-program will terminate quickly and will only call a fixed number of kernel +-functions. Original BPF and eBPF are two operand instructions, +-which helps to do one-to-one mapping between eBPF insn and x86 insn during JIT. +- +-The input context pointer for invoking the interpreter function is generic, +-its content is defined by a specific use case. For seccomp register R1 points +-to seccomp_data, for converted BPF filters R1 points to a skb. +- +-A program, that is translated internally consists of the following elements:: +- +- op:16, jt:8, jf:8, k:32 ==> op:8, dst_reg:4, src_reg:4, off:16, imm:32 +- +-So far 87 eBPF instructions were implemented. 8-bit 'op' opcode field +-has room for new instructions. Some of them may use 16/24/32 byte encoding. New +-instructions must be multiple of 8 bytes to preserve backward compatibility. +- +-eBPF is a general purpose RISC instruction set. Not every register and +-every instruction are used during translation from original BPF to eBPF. +-For example, socket filters are not using ``exclusive add`` instruction, but +-tracing filters may do to maintain counters of events, for example. Register R9 +-is not used by socket filters either, but more complex filters may be running +-out of registers and would have to resort to spill/fill to stack. +- +-eBPF can be used as a generic assembler for last step performance +-optimizations, socket filters and seccomp are using it as assembler. Tracing +-filters may use it as assembler to generate code from kernel. In kernel usage +-may not be bounded by security considerations, since generated eBPF code +-may be optimizing internal code path and not being exposed to the user space. +-Safety of eBPF can come from the verifier.rst. In such use cases as +-described, it may be used as safe instruction set. +- +-Just like the original BPF, eBPF runs within a controlled environment, +-is deterministic and the kernel can easily prove that. The safety of the program +-can be determined in two steps: first step does depth-first-search to disallow +-loops and other CFG validation; second step starts from the first insn and +-descends all possible paths. It simulates execution of every insn and observes +-the state change of registers and stack. ++Registers and calling convention ++================================ ++ ++eBPF has 10 general purpose registers and a read-only frame pointer register, ++all of which are 64-bits wide. ++ ++The eBPF calling convention is defined as: ++ ++ * R0: return value from function calls, and exit value for eBPF programs ++ * R1 - R5: arguments for function calls ++ * R6 - R9: callee saved registers that function calls will preserve ++ * R10: read-only frame pointer to access stack ++ ++R0 - R5 are scratch registers and eBPF programs needs to spill/fill them if ++necessary across calls. + + eBPF opcode encoding + ==================== + +-eBPF is reusing most of the opcode encoding from classic to simplify conversion +-of classic BPF to eBPF. For arithmetic and jump instructions the 8-bit 'code' +-field is divided into three parts:: ++For arithmetic and jump instructions the 8-bit 'opcode' field is divided into ++three parts:: + + +----------------+--------+--------------------+ + | 4 bits | 1 bit | 3 bits | +@@ -259,39 +33,29 @@ field is divided into three parts:: + + Three LSB bits store instruction class which is one of: + +- =================== =============== +- Classic BPF classes eBPF classes +- =================== =============== +- BPF_LD 0x00 BPF_LD 0x00 +- BPF_LDX 0x01 BPF_LDX 0x01 +- BPF_ST 0x02 BPF_ST 0x02 +- BPF_STX 0x03 BPF_STX 0x03 +- BPF_ALU 0x04 BPF_ALU 0x04 +- BPF_JMP 0x05 BPF_JMP 0x05 +- BPF_RET 0x06 BPF_JMP32 0x06 +- BPF_MISC 0x07 BPF_ALU64 0x07 +- =================== =============== ++ ========= ===== ++ class value ++ ========= ===== ++ BPF_LD 0x00 ++ BPF_LDX 0x01 ++ BPF_ST 0x02 ++ BPF_STX 0x03 ++ BPF_ALU 0x04 ++ BPF_JMP 0x05 ++ BPF_JMP32 0x06 ++ BPF_ALU64 0x07 ++ ========= ===== + + When BPF_CLASS(code) == BPF_ALU or BPF_JMP, 4th bit encodes source operand ... + +- :: +- +- BPF_K 0x00 +- BPF_X 0x08 +- +- * in classic BPF, this means:: +- +- BPF_SRC(code) == BPF_X - use register X as source operand +- BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand +- +- * in eBPF, this means:: ++:: + +- BPF_SRC(code) == BPF_X - use 'src_reg' register as source operand +- BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand ++ BPF_K 0x00 /* use 32-bit immediate as source operand */ ++ BPF_X 0x08 /* use 'src_reg' register as source operand */ + + ... and four MSB bits store operation code. + +-If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 [ in eBPF ], BPF_OP(code) is one of:: ++If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 BPF_OP(code) is one of:: + + BPF_ADD 0x00 + BPF_SUB 0x10 +@@ -304,45 +68,43 @@ If BPF_CLASS(code) == BPF_ALU or BPF_ALU + BPF_NEG 0x80 + BPF_MOD 0x90 + BPF_XOR 0xa0 +- BPF_MOV 0xb0 /* eBPF only: mov reg to reg */ +- BPF_ARSH 0xc0 /* eBPF only: sign extending shift right */ +- BPF_END 0xd0 /* eBPF only: endianness conversion */ ++ BPF_MOV 0xb0 /* mov reg to reg */ ++ BPF_ARSH 0xc0 /* sign extending shift right */ ++ BPF_END 0xd0 /* endianness conversion */ + +-If BPF_CLASS(code) == BPF_JMP or BPF_JMP32 [ in eBPF ], BPF_OP(code) is one of:: ++If BPF_CLASS(code) == BPF_JMP or BPF_JMP32 BPF_OP(code) is one of:: + + BPF_JA 0x00 /* BPF_JMP only */ + BPF_JEQ 0x10 + BPF_JGT 0x20 + BPF_JGE 0x30 + BPF_JSET 0x40 +- BPF_JNE 0x50 /* eBPF only: jump != */ +- BPF_JSGT 0x60 /* eBPF only: signed '>' */ +- BPF_JSGE 0x70 /* eBPF only: signed '>=' */ +- BPF_CALL 0x80 /* eBPF BPF_JMP only: function call */ +- BPF_EXIT 0x90 /* eBPF BPF_JMP only: function return */ +- BPF_JLT 0xa0 /* eBPF only: unsigned '<' */ +- BPF_JLE 0xb0 /* eBPF only: unsigned '<=' */ +- BPF_JSLT 0xc0 /* eBPF only: signed '<' */ +- BPF_JSLE 0xd0 /* eBPF only: signed '<=' */ +- +-So BPF_ADD | BPF_X | BPF_ALU means 32-bit addition in both classic BPF +-and eBPF. There are only two registers in classic BPF, so it means A += X. +-In eBPF it means dst_reg = (u32) dst_reg + (u32) src_reg; similarly, +-BPF_XOR | BPF_K | BPF_ALU means A ^= imm32 in classic BPF and analogous +-src_reg = (u32) src_reg ^ (u32) imm32 in eBPF. +- +-Classic BPF is using BPF_MISC class to represent A = X and X = A moves. +-eBPF is using BPF_MOV | BPF_X | BPF_ALU code instead. Since there are no +-BPF_MISC operations in eBPF, the class 7 is used as BPF_ALU64 to mean +-exactly the same operations as BPF_ALU, but with 64-bit wide operands +-instead. So BPF_ADD | BPF_X | BPF_ALU64 means 64-bit addition, i.e.: +-dst_reg = dst_reg + src_reg +- +-Classic BPF wastes the whole BPF_RET class to represent a single ``ret`` +-operation. Classic BPF_RET | BPF_K means copy imm32 into return register +-and perform function exit. eBPF is modeled to match CPU, so BPF_JMP | BPF_EXIT +-in eBPF means function exit only. The eBPF program needs to store return +-value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is used as ++ BPF_JNE 0x50 /* jump != */ ++ BPF_JSGT 0x60 /* signed '>' */ ++ BPF_JSGE 0x70 /* signed '>=' */ ++ BPF_CALL 0x80 /* function call */ ++ BPF_EXIT 0x90 /* function return */ ++ BPF_JLT 0xa0 /* unsigned '<' */ ++ BPF_JLE 0xb0 /* unsigned '<=' */ ++ BPF_JSLT 0xc0 /* signed '<' */ ++ BPF_JSLE 0xd0 /* signed '<=' */ ++ ++So BPF_ADD | BPF_X | BPF_ALU means:: ++ ++ dst_reg = (u32) dst_reg + (u32) src_reg; ++ ++Similarly, BPF_XOR | BPF_K | BPF_ALU means:: ++ ++ src_reg = (u32) src_reg ^ (u32) imm32 ++ ++eBPF is using BPF_MOV | BPF_X | BPF_ALU to represent A = B moves. BPF_ALU64 ++is used to mean exactly the same operations as BPF_ALU, but with 64-bit wide ++operands instead. So BPF_ADD | BPF_X | BPF_ALU64 means 64-bit addition, i.e.:: ++ ++ dst_reg = dst_reg + src_reg ++ ++BPF_JMP | BPF_EXIT means function exit only. The eBPF program needs to store ++the return value into register R0 before doing a BPF_EXIT. Class 6 is used as + BPF_JMP32 to mean exactly the same operations as BPF_JMP, but with 32-bit wide + operands for the comparisons instead. + +@@ -361,29 +123,27 @@ Size modifier is one of ... + BPF_W 0x00 /* word */ + BPF_H 0x08 /* half word */ + BPF_B 0x10 /* byte */ +- BPF_DW 0x18 /* eBPF only, double word */ ++ BPF_DW 0x18 /* double word */ + + ... which encodes size of load/store operation:: + + B - 1 byte + H - 2 byte + W - 4 byte +- DW - 8 byte (eBPF only) ++ DW - 8 byte + + Mode modifier is one of:: + +- BPF_IMM 0x00 /* used for 32-bit mov in classic BPF and 64-bit in eBPF */ ++ BPF_IMM 0x00 /* used for 64-bit mov */ + BPF_ABS 0x20 + BPF_IND 0x40 + BPF_MEM 0x60 +- BPF_LEN 0x80 /* classic BPF only, reserved in eBPF */ +- BPF_MSH 0xa0 /* classic BPF only, reserved in eBPF */ +- BPF_ATOMIC 0xc0 /* eBPF only, atomic operations */ ++ BPF_ATOMIC 0xc0 /* atomic operations */ + + eBPF has two non-generic instructions: (BPF_ABS | | BPF_LD) and + (BPF_IND | | BPF_LD) which are used to access packet data. + +-They had to be carried over from classic to have strong performance of ++They had to be carried over from classic BPF to have strong performance of + socket filters running in eBPF interpreter. These instructions can only + be used when interpreter context is a pointer to ``struct sk_buff`` and + have seven implicit operands. Register R6 is an implicit input that must +@@ -405,7 +165,7 @@ For example:: + R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32)) + and R1 - R5 were scratched. + +-Unlike classic BPF instruction set, eBPF has generic load/store operations:: ++eBPF has generic load/store operations:: + + BPF_MEM | | BPF_STX: *(size *) (dst_reg + off) = src_reg + BPF_MEM | | BPF_ST: *(size *) (dst_reg + off) = imm32 +@@ -460,5 +220,3 @@ zero. + eBPF has one 16-byte instruction: ``BPF_LD | BPF_DW | BPF_IMM`` which consists + of two consecutive ``struct bpf_insn`` 8-byte blocks and interpreted as single + instruction that loads 64-bit immediate value into a dst_reg. +-Classic BPF has similar instruction: ``BPF_LD | BPF_W | BPF_IMM`` which loads +-32-bit immediate value into a register. diff --git a/patches.suse/bpf-mips-Fix-build-errors-about-__NR_bpf-undeclared.patch b/patches.suse/bpf-mips-Fix-build-errors-about-__NR_bpf-undeclared.patch new file mode 100644 index 0000000..1f21563 --- /dev/null +++ b/patches.suse/bpf-mips-Fix-build-errors-about-__NR_bpf-undeclared.patch @@ -0,0 +1,118 @@ +From: Tiezhu Yang +Date: Thu, 25 Nov 2021 09:36:07 +0800 +Subject: bpf, mips: Fix build errors about __NR_bpf undeclared +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.17-rc1 +Git-commit: e32cb12ff52a2840fc1248998717f7b95c42f064 +References: jsc#PED-1368 + +Add the __NR_bpf definitions to fix the following build errors for mips: + + $ cd tools/bpf/bpftool + $ make + [...] + bpf.c:54:4: error: #error __NR_bpf not defined. libbpf does not support your arch. + # error __NR_bpf not defined. libbpf does not support your arch. + ^~~~~ + bpf.c: In function ‘sys_bpf’: + bpf.c:66:17: error: ‘__NR_bpf’ undeclared (first use in this function); did you mean ‘__NR_brk’? + return syscall(__NR_bpf, cmd, attr, size); + ^~~~~~~~ + __NR_brk + [...] + In file included from gen_loader.c:15:0: + skel_internal.h: In function ‘skel_sys_bpf’: + skel_internal.h:53:17: error: ‘__NR_bpf’ undeclared (first use in this function); did you mean ‘__NR_brk’? + return syscall(__NR_bpf, cmd, attr, size); + ^~~~~~~~ + __NR_brk + +We can see the following generated definitions: + + $ grep -r "#define __NR_bpf" arch/mips + arch/mips/include/generated/uapi/asm/unistd_o32.h:#define __NR_bpf (__NR_Linux + 355) + arch/mips/include/generated/uapi/asm/unistd_n64.h:#define __NR_bpf (__NR_Linux + 315) + arch/mips/include/generated/uapi/asm/unistd_n32.h:#define __NR_bpf (__NR_Linux + 319) + +The __NR_Linux is defined in arch/mips/include/uapi/asm/unistd.h: + + $ grep -r "#define __NR_Linux" arch/mips + arch/mips/include/uapi/asm/unistd.h:#define __NR_Linux 4000 + arch/mips/include/uapi/asm/unistd.h:#define __NR_Linux 5000 + arch/mips/include/uapi/asm/unistd.h:#define __NR_Linux 6000 + +That is to say, __NR_bpf is: + + 4000 + 355 = 4355 for mips o32, + 6000 + 319 = 6319 for mips n32, + 5000 + 315 = 5315 for mips n64. + +So use the GCC pre-defined macro _ABIO32, _ABIN32 and _ABI64 [1] to define +the corresponding __NR_bpf. + +This patch is similar with commit bad1926dd2f6 ("bpf, s390: fix build for +libbpf and selftest suite"). + + [1] https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=gcc/config/mips/mips.h#l549 + +Signed-off-by: Tiezhu Yang +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/1637804167-8323-1-git-send-email-yangtiezhu@loongson.cn +Acked-by: Shung-Hsi Yu +--- + tools/build/feature/test-bpf.c | 6 ++++++ + tools/lib/bpf/bpf.c | 6 ++++++ + tools/lib/bpf/skel_internal.h | 10 ++++++++++ + 3 files changed, 22 insertions(+) + +--- a/tools/build/feature/test-bpf.c ++++ b/tools/build/feature/test-bpf.c +@@ -14,6 +14,12 @@ + # define __NR_bpf 349 + # elif defined(__s390__) + # define __NR_bpf 351 ++# elif defined(__mips__) && defined(_ABIO32) ++# define __NR_bpf 4355 ++# elif defined(__mips__) && defined(_ABIN32) ++# define __NR_bpf 6319 ++# elif defined(__mips__) && defined(_ABI64) ++# define __NR_bpf 5315 + # else + # error __NR_bpf not defined. libbpf does not support your arch. + # endif +--- a/tools/lib/bpf/bpf.c ++++ b/tools/lib/bpf/bpf.c +@@ -50,6 +50,12 @@ + # define __NR_bpf 351 + # elif defined(__arc__) + # define __NR_bpf 280 ++# elif defined(__mips__) && defined(_ABIO32) ++# define __NR_bpf 4355 ++# elif defined(__mips__) && defined(_ABIN32) ++# define __NR_bpf 6319 ++# elif defined(__mips__) && defined(_ABI64) ++# define __NR_bpf 5315 + # else + # error __NR_bpf not defined. libbpf does not support your arch. + # endif +--- a/tools/lib/bpf/skel_internal.h ++++ b/tools/lib/bpf/skel_internal.h +@@ -7,6 +7,16 @@ + #include + #include + ++#ifndef __NR_bpf ++# if defined(__mips__) && defined(_ABIO32) ++# define __NR_bpf 4355 ++# elif defined(__mips__) && defined(_ABIN32) ++# define __NR_bpf 6319 ++# elif defined(__mips__) && defined(_ABI64) ++# define __NR_bpf 5315 ++# endif ++#endif ++ + /* This file is a base header for auto-generated *.lskel.h files. + * Its contents will change and may become part of auto-generation in the future. + * diff --git a/patches.suse/bpf-powerpc-Remove-extra_pass-from-bpf_jit_build_bod.patch b/patches.suse/bpf-powerpc-Remove-extra_pass-from-bpf_jit_build_bod.patch new file mode 100644 index 0000000..0b7684b --- /dev/null +++ b/patches.suse/bpf-powerpc-Remove-extra_pass-from-bpf_jit_build_bod.patch @@ -0,0 +1,102 @@ +From: Ravi Bangoria +Date: Tue, 12 Oct 2021 18:00:50 +0530 +Subject: bpf powerpc: Remove extra_pass from bpf_jit_build_body() +Patch-mainline: v5.17-rc1 +Git-commit: 04c04205bc35d0ecdc57146995ca9eb957d4f379 +References: jsc#PED-1368 + +In case of extra_pass, usual JIT passes are always skipped. So, +extra_pass is always false while calling bpf_jit_build_body() and +can be removed. + +Signed-off-by: Ravi Bangoria +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20211012123056.485795-3-hbathini@linux.ibm.com +Acked-by: Shung-Hsi Yu +--- + arch/powerpc/net/bpf_jit.h | 2 +- + arch/powerpc/net/bpf_jit_comp.c | 6 +++--- + arch/powerpc/net/bpf_jit_comp32.c | 4 ++-- + arch/powerpc/net/bpf_jit_comp64.c | 4 ++-- + 4 files changed, 8 insertions(+), 8 deletions(-) + +--- a/arch/powerpc/net/bpf_jit.h ++++ b/arch/powerpc/net/bpf_jit.h +@@ -175,7 +175,7 @@ static inline void bpf_clear_seen_regist + + void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); + int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, +- u32 *addrs, bool extra_pass); ++ u32 *addrs); + void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); + void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); + void bpf_jit_realloc_regs(struct codegen_context *ctx); +--- a/arch/powerpc/net/bpf_jit_comp.c ++++ b/arch/powerpc/net/bpf_jit_comp.c +@@ -149,7 +149,7 @@ struct bpf_prog *bpf_int_jit_compile(str + cgctx.stack_size = round_up(fp->aux->stack_depth, 16); + + /* Scouting faux-generate pass 0 */ +- if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { ++ if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) { + /* We hit something illegal or unsupported. */ + fp = org_fp; + goto out_addrs; +@@ -162,7 +162,7 @@ struct bpf_prog *bpf_int_jit_compile(str + */ + if (cgctx.seen & SEEN_TAILCALL) { + cgctx.idx = 0; +- if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { ++ if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) { + fp = org_fp; + goto out_addrs; + } +@@ -210,7 +210,7 @@ skip_init_ctx: + /* Now build the prologue, body code & epilogue for real. */ + cgctx.idx = 0; + bpf_jit_build_prologue(code_base, &cgctx); +- if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass)) { ++ if (bpf_jit_build_body(fp, code_base, &cgctx, addrs)) { + bpf_jit_binary_free(bpf_hdr); + fp = org_fp; + goto out_addrs; +--- a/arch/powerpc/net/bpf_jit_comp32.c ++++ b/arch/powerpc/net/bpf_jit_comp32.c +@@ -268,7 +268,7 @@ static int bpf_jit_emit_tail_call(u32 *i + + /* Assemble the body code between the prologue & epilogue */ + int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, +- u32 *addrs, bool extra_pass) ++ u32 *addrs) + { + const struct bpf_insn *insn = fp->insnsi; + int flen = fp->len; +@@ -862,7 +862,7 @@ int bpf_jit_build_body(struct bpf_prog * + case BPF_JMP | BPF_CALL: + ctx->seen |= SEEN_FUNC; + +- ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, ++ ret = bpf_jit_get_func_addr(fp, &insn[i], false, + &func_addr, &func_addr_fixed); + if (ret < 0) + return ret; +--- a/arch/powerpc/net/bpf_jit_comp64.c ++++ b/arch/powerpc/net/bpf_jit_comp64.c +@@ -297,7 +297,7 @@ asm ( + + /* Assemble the body code between the prologue & epilogue */ + int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, +- u32 *addrs, bool extra_pass) ++ u32 *addrs) + { + enum stf_barrier_type stf_barrier = stf_barrier_type_get(); + const struct bpf_insn *insn = fp->insnsi; +@@ -831,7 +831,7 @@ emit_clear: + case BPF_JMP | BPF_CALL: + ctx->seen |= SEEN_FUNC; + +- ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, ++ ret = bpf_jit_get_func_addr(fp, &insn[i], false, + &func_addr, &func_addr_fixed); + if (ret < 0) + return ret; diff --git a/patches.suse/bpf-powerpc-Remove-unused-SEEN_STACK.patch b/patches.suse/bpf-powerpc-Remove-unused-SEEN_STACK.patch new file mode 100644 index 0000000..ab37390 --- /dev/null +++ b/patches.suse/bpf-powerpc-Remove-unused-SEEN_STACK.patch @@ -0,0 +1,31 @@ +From: Ravi Bangoria +Date: Tue, 12 Oct 2021 18:00:49 +0530 +Subject: bpf powerpc: Remove unused SEEN_STACK +Patch-mainline: v5.17-rc1 +Git-commit: c9ce7c36e4870bd307101ba7a00a39d9aad270f3 +References: jsc#PED-1368 + +SEEN_STACK is unused on PowerPC. Remove it. Also, have +SEEN_TAILCALL use 0x40000000. + +Signed-off-by: Ravi Bangoria +Reviewed-by: Christophe Leroy +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20211012123056.485795-2-hbathini@linux.ibm.com +Acked-by: Shung-Hsi Yu +--- + arch/powerpc/net/bpf_jit.h | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/arch/powerpc/net/bpf_jit.h ++++ b/arch/powerpc/net/bpf_jit.h +@@ -125,8 +125,7 @@ + #define COND_LE (CR0_GT | COND_CMP_FALSE) + + #define SEEN_FUNC 0x20000000 /* might call external helpers */ +-#define SEEN_STACK 0x40000000 /* uses BPF stack */ +-#define SEEN_TAILCALL 0x80000000 /* uses tail calls */ ++#define SEEN_TAILCALL 0x40000000 /* uses tail calls */ + + #define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */ + #define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */ diff --git a/patches.suse/bpf-powerpc-refactor-JIT-compiler-code.patch b/patches.suse/bpf-powerpc-refactor-JIT-compiler-code.patch new file mode 100644 index 0000000..966e659 --- /dev/null +++ b/patches.suse/bpf-powerpc-refactor-JIT-compiler-code.patch @@ -0,0 +1,125 @@ +From: Hari Bathini +Date: Tue, 12 Oct 2021 18:00:51 +0530 +Subject: bpf powerpc: refactor JIT compiler code +Patch-mainline: v5.17-rc1 +Git-commit: efa95f031bf38c85cf865413335a3dc044e3194e +References: jsc#PED-1368 + +Refactor powerpc LDX JITing code to simplify adding BPF_PROBE_MEM +support. + +Signed-off-by: Hari Bathini +Reviewed-by: Christophe Leroy +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20211012123056.485795-4-hbathini@linux.ibm.com +Acked-by: Shung-Hsi Yu +--- + arch/powerpc/net/bpf_jit_comp32.c | 33 +++++++++++++++++++-------------- + arch/powerpc/net/bpf_jit_comp64.c | 31 ++++++++++++++++++------------- + 2 files changed, 37 insertions(+), 27 deletions(-) + +--- a/arch/powerpc/net/bpf_jit_comp32.c ++++ b/arch/powerpc/net/bpf_jit_comp32.c +@@ -284,6 +284,7 @@ int bpf_jit_build_body(struct bpf_prog * + u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg); + u32 src_reg_h = src_reg - 1; + u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG); ++ u32 size = BPF_SIZE(code); + s16 off = insn[i].off; + s32 imm = insn[i].imm; + bool func_addr_fixed; +@@ -812,23 +813,27 @@ int bpf_jit_build_body(struct bpf_prog * + * BPF_LDX + */ + case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */ +- EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); +- if (!fp->aux->verifier_zext) +- EMIT(PPC_RAW_LI(dst_reg_h, 0)); +- break; + case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ +- EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); +- if (!fp->aux->verifier_zext) +- EMIT(PPC_RAW_LI(dst_reg_h, 0)); +- break; + case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ +- EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); +- if (!fp->aux->verifier_zext) +- EMIT(PPC_RAW_LI(dst_reg_h, 0)); +- break; + case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ +- EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off)); +- EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4)); ++ switch (size) { ++ case BPF_B: ++ EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); ++ break; ++ case BPF_H: ++ EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); ++ break; ++ case BPF_W: ++ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); ++ break; ++ case BPF_DW: ++ EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off)); ++ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4)); ++ break; ++ } ++ ++ if (size != BPF_DW && !fp->aux->verifier_zext) ++ EMIT(PPC_RAW_LI(dst_reg_h, 0)); + break; + + /* +--- a/arch/powerpc/net/bpf_jit_comp64.c ++++ b/arch/powerpc/net/bpf_jit_comp64.c +@@ -311,6 +311,7 @@ int bpf_jit_build_body(struct bpf_prog * + u32 code = insn[i].code; + u32 dst_reg = b2p[insn[i].dst_reg]; + u32 src_reg = b2p[insn[i].src_reg]; ++ u32 size = BPF_SIZE(code); + s16 off = insn[i].off; + s32 imm = insn[i].imm; + bool func_addr_fixed; +@@ -778,25 +779,29 @@ emit_clear: + */ + /* dst = *(u8 *)(ul) (src + off) */ + case BPF_LDX | BPF_MEM | BPF_B: +- EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); +- if (insn_is_zext(&insn[i + 1])) +- addrs[++i] = ctx->idx * 4; +- break; + /* dst = *(u16 *)(ul) (src + off) */ + case BPF_LDX | BPF_MEM | BPF_H: +- EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); +- if (insn_is_zext(&insn[i + 1])) +- addrs[++i] = ctx->idx * 4; +- break; + /* dst = *(u32 *)(ul) (src + off) */ + case BPF_LDX | BPF_MEM | BPF_W: +- EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); +- if (insn_is_zext(&insn[i + 1])) +- addrs[++i] = ctx->idx * 4; +- break; + /* dst = *(u64 *)(ul) (src + off) */ + case BPF_LDX | BPF_MEM | BPF_DW: +- PPC_BPF_LL(dst_reg, src_reg, off); ++ switch (size) { ++ case BPF_B: ++ EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); ++ break; ++ case BPF_H: ++ EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); ++ break; ++ case BPF_W: ++ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); ++ break; ++ case BPF_DW: ++ PPC_BPF_LL(dst_reg, src_reg, off); ++ break; ++ } ++ ++ if (size != BPF_DW && insn_is_zext(&insn[i + 1])) ++ addrs[++i] = ctx->idx * 4; + break; + + /* diff --git a/patches.suse/bpf-ppc32-Access-only-if-addr-is-kernel-address.patch b/patches.suse/bpf-ppc32-Access-only-if-addr-is-kernel-address.patch new file mode 100644 index 0000000..8e1fb52 --- /dev/null +++ b/patches.suse/bpf-ppc32-Access-only-if-addr-is-kernel-address.patch @@ -0,0 +1,74 @@ +From: Hari Bathini +Date: Tue, 12 Oct 2021 18:00:56 +0530 +Subject: bpf ppc32: Access only if addr is kernel address +Patch-mainline: v5.17-rc1 +Git-commit: e919c0b2323bedec00e1ecc6280498ff81f59b15 +References: jsc#PED-1368 + +With KUAP enabled, any kernel code which wants to access userspace +needs to be surrounded by disable-enable KUAP. But that is not +happening for BPF_PROBE_MEM load instruction. Though PPC32 does not +support read protection, considering the fact that PTR_TO_BTF_ID +(which uses BPF_PROBE_MEM mode) could either be a valid kernel pointer +or NULL but should never be a pointer to userspace address, execute +BPF_PROBE_MEM load only if addr is kernel address, otherwise set +dst_reg=0 and move on. + +This will catch NULL, valid or invalid userspace pointers. Only bad +kernel pointer will be handled by BPF exception table. + +[Alexei suggested for x86] + +Suggested-by: Alexei Starovoitov +Signed-off-by: Hari Bathini +Reviewed-by: Christophe Leroy +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20211012123056.485795-9-hbathini@linux.ibm.com +Acked-by: Shung-Hsi Yu +--- + arch/powerpc/net/bpf_jit_comp32.c | 34 ++++++++++++++++++++++++++++++++++ + 1 file changed, 34 insertions(+) + +--- a/arch/powerpc/net/bpf_jit_comp32.c ++++ b/arch/powerpc/net/bpf_jit_comp32.c +@@ -820,6 +820,40 @@ int bpf_jit_build_body(struct bpf_prog * + case BPF_LDX | BPF_PROBE_MEM | BPF_W: + case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: ++ /* ++ * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid ++ * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM ++ * load only if addr is kernel address (see is_kernel_addr()), otherwise ++ * set dst_reg=0 and move on. ++ */ ++ if (BPF_MODE(code) == BPF_PROBE_MEM) { ++ PPC_LI32(_R0, TASK_SIZE - off); ++ EMIT(PPC_RAW_CMPLW(src_reg, _R0)); ++ PPC_BCC(COND_GT, (ctx->idx + 5) * 4); ++ EMIT(PPC_RAW_LI(dst_reg, 0)); ++ /* ++ * For BPF_DW case, "li reg_h,0" would be needed when ++ * !fp->aux->verifier_zext. Emit NOP otherwise. ++ * ++ * Note that "li reg_h,0" is emitted for BPF_B/H/W case, ++ * if necessary. So, jump there insted of emitting an ++ * additional "li reg_h,0" instruction. ++ */ ++ if (size == BPF_DW && !fp->aux->verifier_zext) ++ EMIT(PPC_RAW_LI(dst_reg_h, 0)); ++ else ++ EMIT(PPC_RAW_NOP()); ++ /* ++ * Need to jump two instructions instead of one for BPF_DW case ++ * as there are two load instructions for dst_reg_h & dst_reg ++ * respectively. ++ */ ++ if (size == BPF_DW) ++ PPC_JMP((ctx->idx + 3) * 4); ++ else ++ PPC_JMP((ctx->idx + 2) * 4); ++ } ++ + switch (size) { + case BPF_B: + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); diff --git a/patches.suse/bpf-ppc32-Add-BPF_PROBE_MEM-support-for-JIT.patch b/patches.suse/bpf-ppc32-Add-BPF_PROBE_MEM-support-for-JIT.patch new file mode 100644 index 0000000..84ef843 --- /dev/null +++ b/patches.suse/bpf-ppc32-Add-BPF_PROBE_MEM-support-for-JIT.patch @@ -0,0 +1,132 @@ +From: Hari Bathini +Date: Tue, 12 Oct 2021 18:00:55 +0530 +Subject: bpf ppc32: Add BPF_PROBE_MEM support for JIT +Patch-mainline: v5.17-rc1 +Git-commit: 23b51916ee129833453d8a3d6bde0ff392f82fce +References: jsc#PED-1368 + +BPF load instruction with BPF_PROBE_MEM mode can cause a fault +inside kernel. Append exception table for such instructions +within BPF program. + +Unlike other archs which uses extable 'fixup' field to pass dest_reg +and nip, BPF exception table on PowerPC follows the generic PowerPC +exception table design, where it populates both fixup and extable +sections within BPF program. fixup section contains 3 instructions, +first 2 instructions clear dest_reg (lower & higher 32-bit registers) +and last instruction jumps to next instruction in the BPF code. +extable 'insn' field contains relative offset of the instruction and +'fixup' field contains relative offset of the fixup entry. Example +layout of BPF program with extable present: + + +------------------+ + | | + | | + 0x4020 -->| lwz r28,4(r4) | + | | + | | + 0x40ac -->| lwz r3,0(r24) | + | lwz r4,4(r24) | + | | + | | + |------------------| + 0x4278 -->| li r28,0 | \ + | li r27,0 | | fixup entry + | b 0x4024 | / + 0x4284 -->| li r4,0 | + | li r3,0 | + | b 0x40b4 | + |------------------| + 0x4290 -->| insn=0xfffffd90 | \ extable entry + | fixup=0xffffffe4 | / + 0x4298 -->| insn=0xfffffe14 | + | fixup=0xffffffe8 | + +------------------+ + + (Addresses shown here are chosen random, not real) + +Signed-off-by: Hari Bathini +Reviewed-by: Christophe Leroy +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20211012123056.485795-8-hbathini@linux.ibm.com +Acked-by: Shung-Hsi Yu +--- + arch/powerpc/net/bpf_jit.h | 4 ++++ + arch/powerpc/net/bpf_jit_comp.c | 2 ++ + arch/powerpc/net/bpf_jit_comp32.c | 30 ++++++++++++++++++++++++++++++ + 3 files changed, 36 insertions(+) + +--- a/arch/powerpc/net/bpf_jit.h ++++ b/arch/powerpc/net/bpf_jit.h +@@ -153,7 +153,11 @@ struct codegen_context { + unsigned int exentry_idx; + }; + ++#ifdef CONFIG_PPC32 ++#define BPF_FIXUP_LEN 3 /* Three instructions => 12 bytes */ ++#else + #define BPF_FIXUP_LEN 2 /* Two instructions => 8 bytes */ ++#endif + + static inline void bpf_flush_icache(void *start, void *end) + { +--- a/arch/powerpc/net/bpf_jit_comp.c ++++ b/arch/powerpc/net/bpf_jit_comp.c +@@ -297,6 +297,8 @@ int bpf_add_extable_entry(struct bpf_pro + (ctx->exentry_idx * BPF_FIXUP_LEN * 4); + + fixup[0] = PPC_RAW_LI(dst_reg, 0); ++ if (IS_ENABLED(CONFIG_PPC32)) ++ fixup[1] = PPC_RAW_LI(dst_reg - 1, 0); /* clear higher 32-bit register too */ + + fixup[BPF_FIXUP_LEN - 1] = + PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]); +--- a/arch/powerpc/net/bpf_jit_comp32.c ++++ b/arch/powerpc/net/bpf_jit_comp32.c +@@ -813,9 +813,13 @@ int bpf_jit_build_body(struct bpf_prog * + * BPF_LDX + */ + case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */ ++ case BPF_LDX | BPF_PROBE_MEM | BPF_B: + case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ ++ case BPF_LDX | BPF_PROBE_MEM | BPF_H: + case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ ++ case BPF_LDX | BPF_PROBE_MEM | BPF_W: + case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ ++ case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + switch (size) { + case BPF_B: + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); +@@ -834,6 +838,32 @@ int bpf_jit_build_body(struct bpf_prog * + + if (size != BPF_DW && !fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); ++ ++ if (BPF_MODE(code) == BPF_PROBE_MEM) { ++ int insn_idx = ctx->idx - 1; ++ int jmp_off = 4; ++ ++ /* ++ * In case of BPF_DW, two lwz instructions are emitted, one ++ * for higher 32-bit and another for lower 32-bit. So, set ++ * ex->insn to the first of the two and jump over both ++ * instructions in fixup. ++ * ++ * Similarly, with !verifier_zext, two instructions are ++ * emitted for BPF_B/H/W case. So, set ex->insn to the ++ * instruction that could fault and skip over both ++ * instructions. ++ */ ++ if (size == BPF_DW || !fp->aux->verifier_zext) { ++ insn_idx -= 1; ++ jmp_off += 4; ++ } ++ ++ ret = bpf_add_extable_entry(fp, image, pass, ctx, insn_idx, ++ jmp_off, dst_reg); ++ if (ret) ++ return ret; ++ } + break; + + /* diff --git a/patches.suse/bpf-ppc64-Access-only-if-addr-is-kernel-address.patch b/patches.suse/bpf-ppc64-Access-only-if-addr-is-kernel-address.patch new file mode 100644 index 0000000..b314655 --- /dev/null +++ b/patches.suse/bpf-ppc64-Access-only-if-addr-is-kernel-address.patch @@ -0,0 +1,71 @@ +From: Ravi Bangoria +Date: Tue, 12 Oct 2021 18:00:54 +0530 +Subject: bpf ppc64: Access only if addr is kernel address +Patch-mainline: v5.17-rc1 +Git-commit: 9c70c7147ffec31de67d33243570a533b29f9759 +References: jsc#PED-1368 + +On PPC64 with KUAP enabled, any kernel code which wants to +access userspace needs to be surrounded by disable-enable KUAP. +But that is not happening for BPF_PROBE_MEM load instruction. +So, when BPF program tries to access invalid userspace address, +page-fault handler considers it as bad KUAP fault: + + Kernel attempted to read user page (d0000000) - exploit attempt? (uid: 0) + +Considering the fact that PTR_TO_BTF_ID (which uses BPF_PROBE_MEM +mode) could either be a valid kernel pointer or NULL but should +never be a pointer to userspace address, execute BPF_PROBE_MEM load +only if addr is kernel address, otherwise set dst_reg=0 and move on. + +This will catch NULL, valid or invalid userspace pointers. Only bad +kernel pointer will be handled by BPF exception table. + +[Alexei suggested for x86] + +Suggested-by: Alexei Starovoitov +Signed-off-by: Ravi Bangoria +Signed-off-by: Hari Bathini +Reviewed-by: Christophe Leroy +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20211012123056.485795-7-hbathini@linux.ibm.com +Acked-by: Shung-Hsi Yu +--- + arch/powerpc/net/bpf_jit_comp64.c | 26 ++++++++++++++++++++++++++ + 1 file changed, 26 insertions(+) + +--- a/arch/powerpc/net/bpf_jit_comp64.c ++++ b/arch/powerpc/net/bpf_jit_comp64.c +@@ -789,6 +789,32 @@ emit_clear: + /* dst = *(u64 *)(ul) (src + off) */ + case BPF_LDX | BPF_MEM | BPF_DW: + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: ++ /* ++ * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid ++ * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM ++ * load only if addr is kernel address (see is_kernel_addr()), otherwise ++ * set dst_reg=0 and move on. ++ */ ++ if (BPF_MODE(code) == BPF_PROBE_MEM) { ++ EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], src_reg, off)); ++ if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) ++ PPC_LI64(b2p[TMP_REG_2], 0x8000000000000000ul); ++ else /* BOOK3S_64 */ ++ PPC_LI64(b2p[TMP_REG_2], PAGE_OFFSET); ++ EMIT(PPC_RAW_CMPLD(b2p[TMP_REG_1], b2p[TMP_REG_2])); ++ PPC_BCC(COND_GT, (ctx->idx + 4) * 4); ++ EMIT(PPC_RAW_LI(dst_reg, 0)); ++ /* ++ * Check if 'off' is word aligned because PPC_BPF_LL() ++ * (BPF_DW case) generates two instructions if 'off' is not ++ * word-aligned and one instruction otherwise. ++ */ ++ if (BPF_SIZE(code) == BPF_DW && (off & 3)) ++ PPC_JMP((ctx->idx + 3) * 4); ++ else ++ PPC_JMP((ctx->idx + 2) * 4); ++ } ++ + switch (size) { + case BPF_B: + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); diff --git a/patches.suse/bpf-ppc64-Add-BPF_PROBE_MEM-support-for-JIT.patch b/patches.suse/bpf-ppc64-Add-BPF_PROBE_MEM-support-for-JIT.patch new file mode 100644 index 0000000..3722b33 --- /dev/null +++ b/patches.suse/bpf-ppc64-Add-BPF_PROBE_MEM-support-for-JIT.patch @@ -0,0 +1,268 @@ +From: Ravi Bangoria +Date: Tue, 12 Oct 2021 18:00:53 +0530 +Subject: bpf ppc64: Add BPF_PROBE_MEM support for JIT +Patch-mainline: v5.17-rc1 +Git-commit: 983bdc0245a29cdefcd30d9d484d3edbc4b6d787 +References: jsc#PED-1368 + +BPF load instruction with BPF_PROBE_MEM mode can cause a fault +inside kernel. Append exception table for such instructions +within BPF program. + +Unlike other archs which uses extable 'fixup' field to pass dest_reg +and nip, BPF exception table on PowerPC follows the generic PowerPC +exception table design, where it populates both fixup and extable +sections within BPF program. fixup section contains two instructions, +first instruction clears dest_reg and 2nd jumps to next instruction +in the BPF code. extable 'insn' field contains relative offset of +the instruction and 'fixup' field contains relative offset of the +fixup entry. Example layout of BPF program with extable present: + + +------------------+ + | | + | | + 0x4020 -->| ld r27,4(r3) | + | | + | | + 0x40ac -->| lwz r3,0(r4) | + | | + | | + |------------------| + 0x4280 -->| li r27,0 | \ fixup entry + | b 0x4024 | / + 0x4288 -->| li r3,0 | + | b 0x40b0 | + |------------------| + 0x4290 -->| insn=0xfffffd90 | \ extable entry + | fixup=0xffffffec | / + 0x4298 -->| insn=0xfffffe14 | + | fixup=0xffffffec | + +------------------+ + + (Addresses shown here are chosen random, not real) + +Signed-off-by: Ravi Bangoria +Signed-off-by: Hari Bathini +Reviewed-by: Christophe Leroy +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20211012123056.485795-6-hbathini@linux.ibm.com +Acked-by: Shung-Hsi Yu +--- + arch/powerpc/net/bpf_jit.h | 8 ++++ + arch/powerpc/net/bpf_jit_comp.c | 66 ++++++++++++++++++++++++++++++++++---- + arch/powerpc/net/bpf_jit_comp32.c | 2 - + arch/powerpc/net/bpf_jit_comp64.c | 13 ++++++- + 4 files changed, 80 insertions(+), 9 deletions(-) + +--- a/arch/powerpc/net/bpf_jit.h ++++ b/arch/powerpc/net/bpf_jit.h +@@ -150,8 +150,11 @@ struct codegen_context { + unsigned int idx; + unsigned int stack_size; + int b2p[ARRAY_SIZE(b2p)]; ++ unsigned int exentry_idx; + }; + ++#define BPF_FIXUP_LEN 2 /* Two instructions => 8 bytes */ ++ + static inline void bpf_flush_icache(void *start, void *end) + { + smp_wmb(); /* smp write barrier */ +@@ -175,11 +178,14 @@ static inline void bpf_clear_seen_regist + + void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); + int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, +- u32 *addrs); ++ u32 *addrs, int pass); + void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); + void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); + void bpf_jit_realloc_regs(struct codegen_context *ctx); + ++int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx, ++ int insn_idx, int jmp_off, int dst_reg); ++ + #endif + + #endif +--- a/arch/powerpc/net/bpf_jit_comp.c ++++ b/arch/powerpc/net/bpf_jit_comp.c +@@ -101,6 +101,8 @@ struct bpf_prog *bpf_int_jit_compile(str + struct bpf_prog *tmp_fp; + bool bpf_blinded = false; + bool extra_pass = false; ++ u32 extable_len; ++ u32 fixup_len; + + if (!fp->jit_requested) + return org_fp; +@@ -131,7 +133,6 @@ struct bpf_prog *bpf_int_jit_compile(str + image = jit_data->image; + bpf_hdr = jit_data->header; + proglen = jit_data->proglen; +- alloclen = proglen + FUNCTION_DESCR_SIZE; + extra_pass = true; + goto skip_init_ctx; + } +@@ -149,7 +150,7 @@ struct bpf_prog *bpf_int_jit_compile(str + cgctx.stack_size = round_up(fp->aux->stack_depth, 16); + + /* Scouting faux-generate pass 0 */ +- if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) { ++ if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) { + /* We hit something illegal or unsupported. */ + fp = org_fp; + goto out_addrs; +@@ -162,7 +163,7 @@ struct bpf_prog *bpf_int_jit_compile(str + */ + if (cgctx.seen & SEEN_TAILCALL) { + cgctx.idx = 0; +- if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) { ++ if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) { + fp = org_fp; + goto out_addrs; + } +@@ -177,8 +178,11 @@ struct bpf_prog *bpf_int_jit_compile(str + bpf_jit_build_prologue(0, &cgctx); + bpf_jit_build_epilogue(0, &cgctx); + ++ fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4; ++ extable_len = fp->aux->num_exentries * sizeof(struct exception_table_entry); ++ + proglen = cgctx.idx * 4; +- alloclen = proglen + FUNCTION_DESCR_SIZE; ++ alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len; + + bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns); + if (!bpf_hdr) { +@@ -186,6 +190,9 @@ struct bpf_prog *bpf_int_jit_compile(str + goto out_addrs; + } + ++ if (extable_len) ++ fp->aux->extable = (void *)image + FUNCTION_DESCR_SIZE + proglen + fixup_len; ++ + skip_init_ctx: + code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); + +@@ -210,7 +217,7 @@ skip_init_ctx: + /* Now build the prologue, body code & epilogue for real. */ + cgctx.idx = 0; + bpf_jit_build_prologue(code_base, &cgctx); +- if (bpf_jit_build_body(fp, code_base, &cgctx, addrs)) { ++ if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass)) { + bpf_jit_binary_free(bpf_hdr); + fp = org_fp; + goto out_addrs; +@@ -238,7 +245,7 @@ skip_codegen_passes: + + fp->bpf_func = (void *)image; + fp->jited = 1; +- fp->jited_len = alloclen; ++ fp->jited_len = proglen + FUNCTION_DESCR_SIZE; + + bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); + if (!fp->is_func || extra_pass) { +@@ -262,3 +269,50 @@ out: + + return fp; + } ++ ++/* ++ * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling ++ * this function, as this only applies to BPF_PROBE_MEM, for now. ++ */ ++int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx, ++ int insn_idx, int jmp_off, int dst_reg) ++{ ++ off_t offset; ++ unsigned long pc; ++ struct exception_table_entry *ex; ++ u32 *fixup; ++ ++ /* Populate extable entries only in the last pass */ ++ if (pass != 2) ++ return 0; ++ ++ if (!fp->aux->extable || ++ WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries)) ++ return -EINVAL; ++ ++ pc = (unsigned long)&image[insn_idx]; ++ ++ fixup = (void *)fp->aux->extable - ++ (fp->aux->num_exentries * BPF_FIXUP_LEN * 4) + ++ (ctx->exentry_idx * BPF_FIXUP_LEN * 4); ++ ++ fixup[0] = PPC_RAW_LI(dst_reg, 0); ++ ++ fixup[BPF_FIXUP_LEN - 1] = ++ PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]); ++ ++ ex = &fp->aux->extable[ctx->exentry_idx]; ++ ++ offset = pc - (long)&ex->insn; ++ if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) ++ return -ERANGE; ++ ex->insn = offset; ++ ++ offset = (long)fixup - (long)&ex->fixup; ++ if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) ++ return -ERANGE; ++ ex->fixup = offset; ++ ++ ctx->exentry_idx++; ++ return 0; ++} +--- a/arch/powerpc/net/bpf_jit_comp32.c ++++ b/arch/powerpc/net/bpf_jit_comp32.c +@@ -268,7 +268,7 @@ static int bpf_jit_emit_tail_call(u32 *i + + /* Assemble the body code between the prologue & epilogue */ + int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, +- u32 *addrs) ++ u32 *addrs, int pass) + { + const struct bpf_insn *insn = fp->insnsi; + int flen = fp->len; +--- a/arch/powerpc/net/bpf_jit_comp64.c ++++ b/arch/powerpc/net/bpf_jit_comp64.c +@@ -297,7 +297,7 @@ asm ( + + /* Assemble the body code between the prologue & epilogue */ + int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, +- u32 *addrs) ++ u32 *addrs, int pass) + { + enum stf_barrier_type stf_barrier = stf_barrier_type_get(); + const struct bpf_insn *insn = fp->insnsi; +@@ -779,12 +779,16 @@ emit_clear: + */ + /* dst = *(u8 *)(ul) (src + off) */ + case BPF_LDX | BPF_MEM | BPF_B: ++ case BPF_LDX | BPF_PROBE_MEM | BPF_B: + /* dst = *(u16 *)(ul) (src + off) */ + case BPF_LDX | BPF_MEM | BPF_H: ++ case BPF_LDX | BPF_PROBE_MEM | BPF_H: + /* dst = *(u32 *)(ul) (src + off) */ + case BPF_LDX | BPF_MEM | BPF_W: ++ case BPF_LDX | BPF_PROBE_MEM | BPF_W: + /* dst = *(u64 *)(ul) (src + off) */ + case BPF_LDX | BPF_MEM | BPF_DW: ++ case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + switch (size) { + case BPF_B: + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); +@@ -802,6 +806,13 @@ emit_clear: + + if (size != BPF_DW && insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; ++ ++ if (BPF_MODE(code) == BPF_PROBE_MEM) { ++ ret = bpf_add_extable_entry(fp, image, pass, ctx, ctx->idx - 1, ++ 4, dst_reg); ++ if (ret) ++ return ret; ++ } + break; + + /* diff --git a/patches.suse/bpf-selftests-Add-bind-retry-for-post_bind-4-6.patch b/patches.suse/bpf-selftests-Add-bind-retry-for-post_bind-4-6.patch new file mode 100644 index 0000000..2760c3c --- /dev/null +++ b/patches.suse/bpf-selftests-Add-bind-retry-for-post_bind-4-6.patch @@ -0,0 +1,231 @@ +From: Menglong Dong +Date: Thu, 6 Jan 2022 21:20:22 +0800 +Subject: bpf: selftests: Add bind retry for post_bind{4, 6} +Patch-mainline: v5.17-rc1 +Git-commit: f7342481749365d9ac5f24fb971659a64e045bb5 +References: jsc#PED-1368 + +With previous patch, kernel is able to 'put_port' after sys_bind() +fails. Add the test for that case: rebind another port after +sys_bind() fails. If the bind success, it means previous bind +operation is already undoed. + +Signed-off-by: Menglong Dong +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220106132022.3470772-4-imagedong@tencent.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/test_sock.c | 150 +++++++++++++++++++++++++++----- + 1 file changed, 130 insertions(+), 20 deletions(-) + +--- a/tools/testing/selftests/bpf/test_sock.c ++++ b/tools/testing/selftests/bpf/test_sock.c +@@ -35,12 +35,15 @@ struct sock_test { + /* Endpoint to bind() to */ + const char *ip; + unsigned short port; ++ unsigned short port_retry; + /* Expected test result */ + enum { + LOAD_REJECT, + ATTACH_REJECT, + BIND_REJECT, + SUCCESS, ++ RETRY_SUCCESS, ++ RETRY_REJECT + } result; + }; + +@@ -252,6 +255,99 @@ static struct sock_test tests[] = { + .result = SUCCESS, + }, + { ++ .descr = "bind4 deny specific IP & port of TCP, and retry", ++ .insns = { ++ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), ++ ++ /* if (ip == expected && port == expected) */ ++ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, ++ offsetof(struct bpf_sock, src_ip4)), ++ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ++ __bpf_constant_ntohl(0x7F000001), 4), ++ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, ++ offsetof(struct bpf_sock, src_port)), ++ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2), ++ ++ /* return DENY; */ ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_JMP_A(1), ++ ++ /* else return ALLOW; */ ++ BPF_MOV64_IMM(BPF_REG_0, 1), ++ BPF_EXIT_INSN(), ++ }, ++ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, ++ .attach_type = BPF_CGROUP_INET4_POST_BIND, ++ .domain = AF_INET, ++ .type = SOCK_STREAM, ++ .ip = "127.0.0.1", ++ .port = 4098, ++ .port_retry = 5000, ++ .result = RETRY_SUCCESS, ++ }, ++ { ++ .descr = "bind4 deny specific IP & port of UDP, and retry", ++ .insns = { ++ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), ++ ++ /* if (ip == expected && port == expected) */ ++ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, ++ offsetof(struct bpf_sock, src_ip4)), ++ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ++ __bpf_constant_ntohl(0x7F000001), 4), ++ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, ++ offsetof(struct bpf_sock, src_port)), ++ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2), ++ ++ /* return DENY; */ ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_JMP_A(1), ++ ++ /* else return ALLOW; */ ++ BPF_MOV64_IMM(BPF_REG_0, 1), ++ BPF_EXIT_INSN(), ++ }, ++ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, ++ .attach_type = BPF_CGROUP_INET4_POST_BIND, ++ .domain = AF_INET, ++ .type = SOCK_DGRAM, ++ .ip = "127.0.0.1", ++ .port = 4098, ++ .port_retry = 5000, ++ .result = RETRY_SUCCESS, ++ }, ++ { ++ .descr = "bind6 deny specific IP & port, and retry", ++ .insns = { ++ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), ++ ++ /* if (ip == expected && port == expected) */ ++ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, ++ offsetof(struct bpf_sock, src_ip6[3])), ++ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ++ __bpf_constant_ntohl(0x00000001), 4), ++ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, ++ offsetof(struct bpf_sock, src_port)), ++ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2), ++ ++ /* return DENY; */ ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_JMP_A(1), ++ ++ /* else return ALLOW; */ ++ BPF_MOV64_IMM(BPF_REG_0, 1), ++ BPF_EXIT_INSN(), ++ }, ++ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, ++ .attach_type = BPF_CGROUP_INET6_POST_BIND, ++ .domain = AF_INET6, ++ .type = SOCK_STREAM, ++ .ip = "::1", ++ .port = 8193, ++ .port_retry = 9000, ++ .result = RETRY_SUCCESS, ++ }, ++ { + .descr = "bind4 allow all", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), +@@ -315,14 +411,15 @@ static int attach_sock_prog(int cgfd, in + return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE); + } + +-static int bind_sock(int domain, int type, const char *ip, unsigned short port) ++static int bind_sock(int domain, int type, const char *ip, ++ unsigned short port, unsigned short port_retry) + { + struct sockaddr_storage addr; + struct sockaddr_in6 *addr6; + struct sockaddr_in *addr4; + int sockfd = -1; + socklen_t len; +- int err = 0; ++ int res = SUCCESS; + + sockfd = socket(domain, type, 0); + if (sockfd < 0) +@@ -348,21 +445,44 @@ static int bind_sock(int domain, int typ + goto err; + } + +- if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) +- goto err; ++ if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) { ++ /* sys_bind() may fail for different reasons, errno has to be ++ * checked to confirm that BPF program rejected it. ++ */ ++ if (errno != EPERM) ++ goto err; ++ if (port_retry) ++ goto retry; ++ res = BIND_REJECT; ++ goto out; ++ } + + goto out; ++retry: ++ if (domain == AF_INET) ++ addr4->sin_port = htons(port_retry); ++ else ++ addr6->sin6_port = htons(port_retry); ++ if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) { ++ if (errno != EPERM) ++ goto err; ++ res = RETRY_REJECT; ++ } else { ++ res = RETRY_SUCCESS; ++ } ++ goto out; + err: +- err = -1; ++ res = -1; + out: + close(sockfd); +- return err; ++ return res; + } + + static int run_test_case(int cgfd, const struct sock_test *test) + { + int progfd = -1; + int err = 0; ++ int res; + + printf("Test case: %s .. ", test->descr); + progfd = load_sock_prog(test->insns, test->expected_attach_type); +@@ -380,21 +500,11 @@ static int run_test_case(int cgfd, const + goto err; + } + +- if (bind_sock(test->domain, test->type, test->ip, test->port) == -1) { +- /* sys_bind() may fail for different reasons, errno has to be +- * checked to confirm that BPF program rejected it. +- */ +- if (test->result == BIND_REJECT && errno == EPERM) +- goto out; +- else +- goto err; +- } +- ++ res = bind_sock(test->domain, test->type, test->ip, test->port, ++ test->port_retry); ++ if (res > 0 && test->result == res) ++ goto out; + +- if (test->result != SUCCESS) +- goto err; +- +- goto out; + err: + err = -1; + out: diff --git a/patches.suse/bpf-selftests-Add-check-for-updating-XDP-bpf_link-wi.patch b/patches.suse/bpf-selftests-Add-check-for-updating-XDP-bpf_link-wi.patch new file mode 100644 index 0000000..9cffef2 --- /dev/null +++ b/patches.suse/bpf-selftests-Add-check-for-updating-XDP-bpf_link-wi.patch @@ -0,0 +1,52 @@ +From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= +Date: Fri, 7 Jan 2022 23:11:15 +0100 +Subject: bpf/selftests: Add check for updating XDP bpf_link with wrong program + type +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.17-rc1 +Git-commit: 036a05f50bd777134b1955f400e8d24c0149fef4 +References: jsc#PED-1368 + +Add a check to the xdp_link selftest that the kernel rejects replacing an +XDP program with a different program type on link update. + +v2: +- Split this out into its own patch. + +Signed-off-by: Toke Høiland-Jørgensen +Link: https://lore.kernel.org/r/20220107221115.326171-3-toke@redhat.com +Signed-off-by: Alexei Starovoitov +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/xdp_link.c | 5 +++++ + tools/testing/selftests/bpf/progs/test_xdp_link.c | 6 ++++++ + 2 files changed, 11 insertions(+) + +--- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c ++++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c +@@ -127,6 +127,11 @@ void serial_test_xdp_link(void) + ASSERT_EQ(link_info.prog_id, id1, "link_prog_id"); + ASSERT_EQ(link_info.xdp.ifindex, IFINDEX_LO, "link_ifindex"); + ++ /* updating program under active BPF link with different type fails */ ++ err = bpf_link__update_program(link, skel1->progs.tc_handler); ++ if (!ASSERT_ERR(err, "link_upd_invalid")) ++ goto cleanup; ++ + err = bpf_link__detach(link); + if (!ASSERT_OK(err, "link_detach")) + goto cleanup; +--- a/tools/testing/selftests/bpf/progs/test_xdp_link.c ++++ b/tools/testing/selftests/bpf/progs/test_xdp_link.c +@@ -10,3 +10,9 @@ int xdp_handler(struct xdp_md *xdp) + { + return 0; + } ++ ++SEC("tc") ++int tc_handler(struct __sk_buff *skb) ++{ ++ return 0; ++} diff --git a/patches.suse/bpf-selftests-Add-ringbuf-memory-type-confusion-test.patch b/patches.suse/bpf-selftests-Add-ringbuf-memory-type-confusion-test.patch new file mode 100644 index 0000000..1745779 --- /dev/null +++ b/patches.suse/bpf-selftests-Add-ringbuf-memory-type-confusion-test.patch @@ -0,0 +1,150 @@ +From: Daniel Borkmann +Date: Wed, 12 Jan 2022 12:39:48 +0000 +Subject: bpf, selftests: Add ringbuf memory type confusion test +Patch-mainline: v5.17-rc1 +Git-commit: 37c8d4807d1b8b521b30310dce97f6695dc2c2c6 +References: jsc#PED-1368 + +Add two tests, one which asserts that ring buffer memory can be passed to +other helpers for populating its entry area, and another one where verifier +rejects different type of memory passed to bpf_ringbuf_submit(). + +Signed-off-by: Daniel Borkmann +Acked-by: John Fastabend +Acked-by: Alexei Starovoitov +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/d_path.c | 14 +++++ + tools/testing/selftests/bpf/progs/test_d_path_check_types.c | 32 +++++++++++ + tools/testing/selftests/bpf/verifier/ringbuf.c | 33 +++++++++++- + tools/testing/selftests/bpf/verifier/spill_fill.c | 2 + 4 files changed, 79 insertions(+), 2 deletions(-) + create mode 100644 tools/testing/selftests/bpf/progs/test_d_path_check_types.c + +--- a/tools/testing/selftests/bpf/prog_tests/d_path.c ++++ b/tools/testing/selftests/bpf/prog_tests/d_path.c +@@ -10,6 +10,7 @@ + + #include "test_d_path.skel.h" + #include "test_d_path_check_rdonly_mem.skel.h" ++#include "test_d_path_check_types.skel.h" + + static int duration; + +@@ -167,6 +168,16 @@ static void test_d_path_check_rdonly_mem + test_d_path_check_rdonly_mem__destroy(skel); + } + ++static void test_d_path_check_types(void) ++{ ++ struct test_d_path_check_types *skel; ++ ++ skel = test_d_path_check_types__open_and_load(); ++ ASSERT_ERR_PTR(skel, "unexpected_load_passing_wrong_type"); ++ ++ test_d_path_check_types__destroy(skel); ++} ++ + void test_d_path(void) + { + if (test__start_subtest("basic")) +@@ -174,4 +185,7 @@ void test_d_path(void) + + if (test__start_subtest("check_rdonly_mem")) + test_d_path_check_rdonly_mem(); ++ ++ if (test__start_subtest("check_alloc_mem")) ++ test_d_path_check_types(); + } +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c +@@ -0,0 +1,32 @@ ++// SPDX-License-Identifier: GPL-2.0 ++ ++#include "vmlinux.h" ++#include ++#include ++ ++extern const int bpf_prog_active __ksym; ++ ++struct { ++ __uint(type, BPF_MAP_TYPE_RINGBUF); ++ __uint(max_entries, 1 << 12); ++} ringbuf SEC(".maps"); ++ ++SEC("fentry/security_inode_getattr") ++int BPF_PROG(d_path_check_rdonly_mem, struct path *path, struct kstat *stat, ++ __u32 request_mask, unsigned int query_flags) ++{ ++ void *active; ++ u32 cpu; ++ ++ cpu = bpf_get_smp_processor_id(); ++ active = (void *)bpf_per_cpu_ptr(&bpf_prog_active, cpu); ++ if (active) { ++ /* FAIL here! 'active' points to 'regular' memory. It ++ * cannot be submitted to ring buffer. ++ */ ++ bpf_ringbuf_submit(active, 0); ++ } ++ return 0; ++} ++ ++char _license[] SEC("license") = "GPL"; +--- a/tools/testing/selftests/bpf/verifier/ringbuf.c ++++ b/tools/testing/selftests/bpf/verifier/ringbuf.c +@@ -28,7 +28,7 @@ + }, + .fixup_map_ringbuf = { 1 }, + .result = REJECT, +- .errstr = "dereference of modified mem ptr R1", ++ .errstr = "dereference of modified alloc_mem ptr R1", + }, + { + "ringbuf: invalid reservation offset 2", +@@ -62,3 +62,34 @@ + .result = REJECT, + .errstr = "R7 min value is outside of the allowed memory range", + }, ++{ ++ "ringbuf: check passing rb mem to helpers", ++ .insns = { ++ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), ++ /* reserve 8 byte ringbuf memory */ ++ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), ++ BPF_LD_MAP_FD(BPF_REG_1, 0), ++ BPF_MOV64_IMM(BPF_REG_2, 8), ++ BPF_MOV64_IMM(BPF_REG_3, 0), ++ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), ++ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), ++ /* check whether the reservation was successful */ ++ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), ++ BPF_EXIT_INSN(), ++ /* pass allocated ring buffer memory to fib lookup */ ++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), ++ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), ++ BPF_MOV64_IMM(BPF_REG_3, 8), ++ BPF_MOV64_IMM(BPF_REG_4, 0), ++ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_fib_lookup), ++ /* submit the ringbuf memory */ ++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), ++ BPF_MOV64_IMM(BPF_REG_2, 0), ++ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ }, ++ .fixup_map_ringbuf = { 2 }, ++ .prog_type = BPF_PROG_TYPE_XDP, ++ .result = ACCEPT, ++}, +--- a/tools/testing/selftests/bpf/verifier/spill_fill.c ++++ b/tools/testing/selftests/bpf/verifier/spill_fill.c +@@ -84,7 +84,7 @@ + }, + .fixup_map_ringbuf = { 1 }, + .result = REJECT, +- .errstr = "R0 pointer arithmetic on mem_or_null prohibited", ++ .errstr = "R0 pointer arithmetic on alloc_mem_or_null prohibited", + }, + { + "check corrupted spill/fill", diff --git a/patches.suse/bpf-selftests-Add-verifier-test-for-mem_or_null-regi.patch b/patches.suse/bpf-selftests-Add-verifier-test-for-mem_or_null-regi.patch new file mode 100644 index 0000000..5ee22c4 --- /dev/null +++ b/patches.suse/bpf-selftests-Add-verifier-test-for-mem_or_null-regi.patch @@ -0,0 +1,60 @@ +From: Daniel Borkmann +Date: Wed, 5 Jan 2022 11:33:34 -0800 +Subject: bpf, selftests: Add verifier test for mem_or_null register with + offset. +Patch-mainline: v5.17-rc1 +Git-commit: ca796fe66f7fceff17679ee6cc5fe4b4023de44d +References: jsc#PED-1368 + +Add a new test case with mem_or_null typed register with off > 0 to ensure +it gets rejected by the verifier: + + # ./test_verifier 1011 + #1009/u check with invalid reg offset 0 OK + #1009/p check with invalid reg offset 0 OK + Summary: 2 PASSED, 0 SKIPPED, 0 FAILED + +Signed-off-by: Daniel Borkmann +Signed-off-by: Alexei Starovoitov +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/verifier/spill_fill.c | 28 ++++++++++++++++++++++ + 1 file changed, 28 insertions(+) + +--- a/tools/testing/selftests/bpf/verifier/spill_fill.c ++++ b/tools/testing/selftests/bpf/verifier/spill_fill.c +@@ -59,6 +59,34 @@ + .result_unpriv = ACCEPT, + }, + { ++ "check with invalid reg offset 0", ++ .insns = { ++ /* reserve 8 byte ringbuf memory */ ++ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), ++ BPF_LD_MAP_FD(BPF_REG_1, 0), ++ BPF_MOV64_IMM(BPF_REG_2, 8), ++ BPF_MOV64_IMM(BPF_REG_3, 0), ++ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), ++ /* store a pointer to the reserved memory in R6 */ ++ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), ++ /* add invalid offset to memory or NULL */ ++ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), ++ /* check whether the reservation was successful */ ++ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), ++ /* should not be able to access *(R7) = 0 */ ++ BPF_ST_MEM(BPF_W, BPF_REG_6, 0, 0), ++ /* submit the reserved ringbuf memory */ ++ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), ++ BPF_MOV64_IMM(BPF_REG_2, 0), ++ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ }, ++ .fixup_map_ringbuf = { 1 }, ++ .result = REJECT, ++ .errstr = "R0 pointer arithmetic on mem_or_null prohibited", ++}, ++{ + "check corrupted spill/fill", + .insns = { + /* spill R1(ctx) into stack */ diff --git a/patches.suse/bpf-selftests-Fix-namespace-mount-setup-in-tc_redire.patch b/patches.suse/bpf-selftests-Fix-namespace-mount-setup-in-tc_redire.patch new file mode 100644 index 0000000..f53e6f6 --- /dev/null +++ b/patches.suse/bpf-selftests-Fix-namespace-mount-setup-in-tc_redire.patch @@ -0,0 +1,58 @@ +From: Jiri Olsa +Date: Tue, 4 Jan 2022 13:10:30 +0100 +Subject: bpf/selftests: Fix namespace mount setup in tc_redirect +Patch-mainline: v5.17-rc1 +Git-commit: 5e22dd18626726028a93ff1350a8a71a00fd843d +References: jsc#PED-1368 + +The tc_redirect umounts /sys in the new namespace, which can be +mounted as shared and cause global umount. The lazy umount also +takes down mounted trees under /sys like debugfs, which won't be +available after sysfs mounts again and could cause fails in other +tests. + + # cat /proc/self/mountinfo | grep debugfs + 34 23 0:7 / /sys/kernel/debug rw,nosuid,nodev,noexec,relatime shared:14 - debugfs debugfs rw + # cat /proc/self/mountinfo | grep sysfs + 23 86 0:22 / /sys rw,nosuid,nodev,noexec,relatime shared:2 - sysfs sysfs rw + # mount | grep debugfs + debugfs on /sys/kernel/debug type debugfs (rw,nosuid,nodev,noexec,relatime) + + # ./test_progs -t tc_redirect + #164 tc_redirect:OK + Summary: 1/4 PASSED, 0 SKIPPED, 0 FAILED + + # mount | grep debugfs + # cat /proc/self/mountinfo | grep debugfs + # cat /proc/self/mountinfo | grep sysfs + 25 86 0:22 / /sys rw,relatime shared:2 - sysfs sysfs rw + +Making the sysfs private under the new namespace so the umount won't +trigger the global sysfs umount. + +Reported-by: Hangbin Liu +Signed-off-by: Jiri Olsa +Signed-off-by: Daniel Borkmann +Cc: Jussi Maki +Link: https://lore.kernel.org/bpf/20220104121030.138216-1-jolsa@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/tc_redirect.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c ++++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +@@ -105,6 +105,13 @@ static int setns_by_fd(int nsfd) + if (!ASSERT_OK(err, "unshare")) + return err; + ++ /* Make our /sys mount private, so the following umount won't ++ * trigger the global umount in case it's shared. ++ */ ++ err = mount("none", "/sys", NULL, MS_PRIVATE, NULL); ++ if (!ASSERT_OK(err, "remount private /sys")) ++ return err; ++ + err = umount2("/sys", MNT_DETACH); + if (!ASSERT_OK(err, "umount2 /sys")) + return err; diff --git a/patches.suse/bpf-selftests-Test-bpf_d_path-on-rdonly_mem.patch b/patches.suse/bpf-selftests-Test-bpf_d_path-on-rdonly_mem.patch new file mode 100644 index 0000000..088c0e8 --- /dev/null +++ b/patches.suse/bpf-selftests-Test-bpf_d_path-on-rdonly_mem.patch @@ -0,0 +1,96 @@ +From: Hao Luo +Date: Thu, 6 Jan 2022 12:55:25 -0800 +Subject: bpf/selftests: Test bpf_d_path on rdonly_mem. +Patch-mainline: v5.17-rc1 +Git-commit: 44bab87d8ca6f0544a9f8fc97bdf33aa5b3c899e +References: jsc#PED-1368 + +The second parameter of bpf_d_path() can only accept writable +memories. Rdonly_mem obtained from bpf_per_cpu_ptr() can not +be passed into bpf_d_path for modification. This patch adds +a selftest to verify this behavior. + +Signed-off-by: Hao Luo +Signed-off-by: Andrii Nakryiko +Acked-by: Yonghong Song +Link: https://lore.kernel.org/bpf/20220106205525.2116218-1-haoluo@google.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/d_path.c | 22 +++++++ + tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c | 28 ++++++++++ + 2 files changed, 49 insertions(+), 1 deletion(-) + create mode 100644 tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c + +--- a/tools/testing/selftests/bpf/prog_tests/d_path.c ++++ b/tools/testing/selftests/bpf/prog_tests/d_path.c +@@ -9,6 +9,7 @@ + #define MAX_FILES 7 + + #include "test_d_path.skel.h" ++#include "test_d_path_check_rdonly_mem.skel.h" + + static int duration; + +@@ -99,7 +100,7 @@ out_close: + return ret; + } + +-void test_d_path(void) ++static void test_d_path_basic(void) + { + struct test_d_path__bss *bss; + struct test_d_path *skel; +@@ -155,3 +156,22 @@ void test_d_path(void) + cleanup: + test_d_path__destroy(skel); + } ++ ++static void test_d_path_check_rdonly_mem(void) ++{ ++ struct test_d_path_check_rdonly_mem *skel; ++ ++ skel = test_d_path_check_rdonly_mem__open_and_load(); ++ ASSERT_ERR_PTR(skel, "unexpected_load_overwriting_rdonly_mem"); ++ ++ test_d_path_check_rdonly_mem__destroy(skel); ++} ++ ++void test_d_path(void) ++{ ++ if (test__start_subtest("basic")) ++ test_d_path_basic(); ++ ++ if (test__start_subtest("check_rdonly_mem")) ++ test_d_path_check_rdonly_mem(); ++} +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c +@@ -0,0 +1,28 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) 2022 Google */ ++ ++#include "vmlinux.h" ++#include ++#include ++ ++extern const int bpf_prog_active __ksym; ++ ++SEC("fentry/security_inode_getattr") ++int BPF_PROG(d_path_check_rdonly_mem, struct path *path, struct kstat *stat, ++ __u32 request_mask, unsigned int query_flags) ++{ ++ void *active; ++ __u32 cpu; ++ ++ cpu = bpf_get_smp_processor_id(); ++ active = (void *)bpf_per_cpu_ptr(&bpf_prog_active, cpu); ++ if (active) { ++ /* FAIL here! 'active' points to readonly memory. bpf helpers ++ * that update its arguments can not write into it. ++ */ ++ bpf_d_path(path, active, sizeof(int)); ++ } ++ return 0; ++} ++ ++char _license[] SEC("license") = "GPL"; diff --git a/patches.suse/bpf-selftests-Update-local-storage-selftest-for-slee.patch b/patches.suse/bpf-selftests-Update-local-storage-selftest-for-slee.patch new file mode 100644 index 0000000..4fb13e4 --- /dev/null +++ b/patches.suse/bpf-selftests-Update-local-storage-selftest-for-slee.patch @@ -0,0 +1,186 @@ +From: KP Singh +Date: Fri, 24 Dec 2021 15:29:16 +0000 +Subject: bpf/selftests: Update local storage selftest for sleepable programs +Patch-mainline: v5.17-rc1 +Git-commit: 0ae6eff2978ee118ce2b536090af0682db13bb83 +References: jsc#PED-1368 + +Remove the spin lock logic and update the selftests to use sleepable +programs to use a mix of sleepable and non-sleepable programs. It's more +useful to test the sleepable programs since the tests don't really need +spinlocks. + +Signed-off-by: KP Singh +Signed-off-by: Alexei Starovoitov +Acked-by: Martin KaFai Lau +Link: https://lore.kernel.org/bpf/20211224152916.1550677-3-kpsingh@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/test_local_storage.c | 20 +++------- + tools/testing/selftests/bpf/progs/local_storage.c | 24 ++---------- + 2 files changed, 11 insertions(+), 33 deletions(-) + +--- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c ++++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c +@@ -28,10 +28,6 @@ static unsigned int duration; + struct storage { + void *inode; + unsigned int value; +- /* Lock ensures that spin locked versions of local stoage operations +- * also work, most operations in this tests are still single threaded +- */ +- struct bpf_spin_lock lock; + }; + + /* Fork and exec the provided rm binary and return the exit code of the +@@ -66,27 +62,24 @@ static int run_self_unlink(int *monitore + + static bool check_syscall_operations(int map_fd, int obj_fd) + { +- struct storage val = { .value = TEST_STORAGE_VALUE, .lock = { 0 } }, +- lookup_val = { .value = 0, .lock = { 0 } }; ++ struct storage val = { .value = TEST_STORAGE_VALUE }, ++ lookup_val = { .value = 0 }; + int err; + + /* Looking up an existing element should fail initially */ +- err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, +- BPF_F_LOCK); ++ err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0); + if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", + "err:%d errno:%d\n", err, errno)) + return false; + + /* Create a new element */ +- err = bpf_map_update_elem(map_fd, &obj_fd, &val, +- BPF_NOEXIST | BPF_F_LOCK); ++ err = bpf_map_update_elem(map_fd, &obj_fd, &val, BPF_NOEXIST); + if (CHECK(err < 0, "bpf_map_update_elem", "err:%d errno:%d\n", err, + errno)) + return false; + + /* Lookup the newly created element */ +- err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, +- BPF_F_LOCK); ++ err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0); + if (CHECK(err < 0, "bpf_map_lookup_elem", "err:%d errno:%d", err, + errno)) + return false; +@@ -102,8 +95,7 @@ static bool check_syscall_operations(int + return false; + + /* The lookup should fail, now that the element has been deleted */ +- err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, +- BPF_F_LOCK); ++ err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0); + if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", + "err:%d errno:%d\n", err, errno)) + return false; +--- a/tools/testing/selftests/bpf/progs/local_storage.c ++++ b/tools/testing/selftests/bpf/progs/local_storage.c +@@ -20,7 +20,6 @@ int sk_storage_result = -1; + struct local_storage { + struct inode *exec_inode; + __u32 value; +- struct bpf_spin_lock lock; + }; + + struct { +@@ -58,9 +57,7 @@ int BPF_PROG(unlink_hook, struct inode * + bpf_get_current_task_btf(), 0, 0); + if (storage) { + /* Don't let an executable delete itself */ +- bpf_spin_lock(&storage->lock); + is_self_unlink = storage->exec_inode == victim->d_inode; +- bpf_spin_unlock(&storage->lock); + if (is_self_unlink) + return -EPERM; + } +@@ -68,7 +65,7 @@ int BPF_PROG(unlink_hook, struct inode * + return 0; + } + +-SEC("lsm/inode_rename") ++SEC("lsm.s/inode_rename") + int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +@@ -89,10 +86,8 @@ int BPF_PROG(inode_rename, struct inode + if (!storage) + return 0; + +- bpf_spin_lock(&storage->lock); + if (storage->value != DUMMY_STORAGE_VALUE) + inode_storage_result = -1; +- bpf_spin_unlock(&storage->lock); + + err = bpf_inode_storage_delete(&inode_storage_map, old_dentry->d_inode); + if (!err) +@@ -101,7 +96,7 @@ int BPF_PROG(inode_rename, struct inode + return 0; + } + +-SEC("lsm/socket_bind") ++SEC("lsm.s/socket_bind") + int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, + int addrlen) + { +@@ -117,10 +112,8 @@ int BPF_PROG(socket_bind, struct socket + if (!storage) + return 0; + +- bpf_spin_lock(&storage->lock); + if (storage->value != DUMMY_STORAGE_VALUE) + sk_storage_result = -1; +- bpf_spin_unlock(&storage->lock); + + err = bpf_sk_storage_delete(&sk_storage_map, sock->sk); + if (!err) +@@ -129,7 +122,7 @@ int BPF_PROG(socket_bind, struct socket + return 0; + } + +-SEC("lsm/socket_post_create") ++SEC("lsm.s/socket_post_create") + int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, + int protocol, int kern) + { +@@ -144,9 +137,7 @@ int BPF_PROG(socket_post_create, struct + if (!storage) + return 0; + +- bpf_spin_lock(&storage->lock); + storage->value = DUMMY_STORAGE_VALUE; +- bpf_spin_unlock(&storage->lock); + + return 0; + } +@@ -154,7 +145,7 @@ int BPF_PROG(socket_post_create, struct + /* This uses the local storage to remember the inode of the binary that a + * process was originally executing. + */ +-SEC("lsm/bprm_committed_creds") ++SEC("lsm.s/bprm_committed_creds") + void BPF_PROG(exec, struct linux_binprm *bprm) + { + __u32 pid = bpf_get_current_pid_tgid() >> 32; +@@ -166,18 +157,13 @@ void BPF_PROG(exec, struct linux_binprm + storage = bpf_task_storage_get(&task_storage_map, + bpf_get_current_task_btf(), 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); +- if (storage) { +- bpf_spin_lock(&storage->lock); ++ if (storage) + storage->exec_inode = bprm->file->f_inode; +- bpf_spin_unlock(&storage->lock); +- } + + storage = bpf_inode_storage_get(&inode_storage_map, bprm->file->f_inode, + 0, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!storage) + return; + +- bpf_spin_lock(&storage->lock); + storage->value = DUMMY_STORAGE_VALUE; +- bpf_spin_unlock(&storage->lock); + } diff --git a/patches.suse/bpf-selftests-Use-C99-initializers-in-test_sock.c.patch b/patches.suse/bpf-selftests-Use-C99-initializers-in-test_sock.c.patch new file mode 100644 index 0000000..20a35a4 --- /dev/null +++ b/patches.suse/bpf-selftests-Use-C99-initializers-in-test_sock.c.patch @@ -0,0 +1,358 @@ +From: Menglong Dong +Date: Thu, 6 Jan 2022 21:20:21 +0800 +Subject: bpf: selftests: Use C99 initializers in test_sock.c +Patch-mainline: v5.17-rc1 +Git-commit: 6fd92c7f0c3846340fee20f62dacb17d0a15c0d3 +References: jsc#PED-1368 + +Use C99 initializers for the initialization of 'tests' in test_sock.c. + +Signed-off-by: Menglong Dong +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220106132022.3470772-3-imagedong@tencent.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/test_sock.c | 220 +++++++++++++------------------- + 1 file changed, 92 insertions(+), 128 deletions(-) + +--- a/tools/testing/selftests/bpf/test_sock.c ++++ b/tools/testing/selftests/bpf/test_sock.c +@@ -46,7 +46,7 @@ struct sock_test { + + static struct sock_test tests[] = { + { +- "bind4 load with invalid access: src_ip6", ++ .descr = "bind4 load with invalid access: src_ip6", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, +@@ -54,16 +54,12 @@ static struct sock_test tests[] = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, +- BPF_CGROUP_INET4_POST_BIND, +- BPF_CGROUP_INET4_POST_BIND, +- 0, +- 0, +- NULL, +- 0, +- LOAD_REJECT, ++ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, ++ .attach_type = BPF_CGROUP_INET4_POST_BIND, ++ .result = LOAD_REJECT, + }, + { +- "bind4 load with invalid access: mark", ++ .descr = "bind4 load with invalid access: mark", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, +@@ -71,16 +67,12 @@ static struct sock_test tests[] = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, +- BPF_CGROUP_INET4_POST_BIND, +- BPF_CGROUP_INET4_POST_BIND, +- 0, +- 0, +- NULL, +- 0, +- LOAD_REJECT, ++ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, ++ .attach_type = BPF_CGROUP_INET4_POST_BIND, ++ .result = LOAD_REJECT, + }, + { +- "bind6 load with invalid access: src_ip4", ++ .descr = "bind6 load with invalid access: src_ip4", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, +@@ -88,16 +80,12 @@ static struct sock_test tests[] = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, +- BPF_CGROUP_INET6_POST_BIND, +- BPF_CGROUP_INET6_POST_BIND, +- 0, +- 0, +- NULL, +- 0, +- LOAD_REJECT, ++ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, ++ .attach_type = BPF_CGROUP_INET6_POST_BIND, ++ .result = LOAD_REJECT, + }, + { +- "sock_create load with invalid access: src_port", ++ .descr = "sock_create load with invalid access: src_port", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, +@@ -105,128 +93,106 @@ static struct sock_test tests[] = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, +- BPF_CGROUP_INET_SOCK_CREATE, +- BPF_CGROUP_INET_SOCK_CREATE, +- 0, +- 0, +- NULL, +- 0, +- LOAD_REJECT, ++ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, ++ .attach_type = BPF_CGROUP_INET_SOCK_CREATE, ++ .result = LOAD_REJECT, + }, + { +- "sock_create load w/o expected_attach_type (compat mode)", ++ .descr = "sock_create load w/o expected_attach_type (compat mode)", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, +- 0, +- BPF_CGROUP_INET_SOCK_CREATE, +- AF_INET, +- SOCK_STREAM, +- "127.0.0.1", +- 8097, +- SUCCESS, ++ .expected_attach_type = 0, ++ .attach_type = BPF_CGROUP_INET_SOCK_CREATE, ++ .domain = AF_INET, ++ .type = SOCK_STREAM, ++ .ip = "127.0.0.1", ++ .port = 8097, ++ .result = SUCCESS, + }, + { +- "sock_create load w/ expected_attach_type", ++ .descr = "sock_create load w/ expected_attach_type", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, +- BPF_CGROUP_INET_SOCK_CREATE, +- BPF_CGROUP_INET_SOCK_CREATE, +- AF_INET, +- SOCK_STREAM, +- "127.0.0.1", +- 8097, +- SUCCESS, ++ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, ++ .attach_type = BPF_CGROUP_INET_SOCK_CREATE, ++ .domain = AF_INET, ++ .type = SOCK_STREAM, ++ .ip = "127.0.0.1", ++ .port = 8097, ++ .result = SUCCESS, + }, + { +- "attach type mismatch bind4 vs bind6", ++ .descr = "attach type mismatch bind4 vs bind6", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, +- BPF_CGROUP_INET4_POST_BIND, +- BPF_CGROUP_INET6_POST_BIND, +- 0, +- 0, +- NULL, +- 0, +- ATTACH_REJECT, ++ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, ++ .attach_type = BPF_CGROUP_INET6_POST_BIND, ++ .result = ATTACH_REJECT, + }, + { +- "attach type mismatch bind6 vs bind4", ++ .descr = "attach type mismatch bind6 vs bind4", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, +- BPF_CGROUP_INET6_POST_BIND, +- BPF_CGROUP_INET4_POST_BIND, +- 0, +- 0, +- NULL, +- 0, +- ATTACH_REJECT, ++ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, ++ .attach_type = BPF_CGROUP_INET4_POST_BIND, ++ .result = ATTACH_REJECT, + }, + { +- "attach type mismatch default vs bind4", ++ .descr = "attach type mismatch default vs bind4", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, +- 0, +- BPF_CGROUP_INET4_POST_BIND, +- 0, +- 0, +- NULL, +- 0, +- ATTACH_REJECT, ++ .expected_attach_type = 0, ++ .attach_type = BPF_CGROUP_INET4_POST_BIND, ++ .result = ATTACH_REJECT, + }, + { +- "attach type mismatch bind6 vs sock_create", ++ .descr = "attach type mismatch bind6 vs sock_create", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, +- BPF_CGROUP_INET6_POST_BIND, +- BPF_CGROUP_INET_SOCK_CREATE, +- 0, +- 0, +- NULL, +- 0, +- ATTACH_REJECT, ++ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, ++ .attach_type = BPF_CGROUP_INET_SOCK_CREATE, ++ .result = ATTACH_REJECT, + }, + { +- "bind4 reject all", ++ .descr = "bind4 reject all", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, +- BPF_CGROUP_INET4_POST_BIND, +- BPF_CGROUP_INET4_POST_BIND, +- AF_INET, +- SOCK_STREAM, +- "0.0.0.0", +- 0, +- BIND_REJECT, ++ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, ++ .attach_type = BPF_CGROUP_INET4_POST_BIND, ++ .domain = AF_INET, ++ .type = SOCK_STREAM, ++ .ip = "0.0.0.0", ++ .result = BIND_REJECT, + }, + { +- "bind6 reject all", ++ .descr = "bind6 reject all", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, +- BPF_CGROUP_INET6_POST_BIND, +- BPF_CGROUP_INET6_POST_BIND, +- AF_INET6, +- SOCK_STREAM, +- "::", +- 0, +- BIND_REJECT, ++ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, ++ .attach_type = BPF_CGROUP_INET6_POST_BIND, ++ .domain = AF_INET6, ++ .type = SOCK_STREAM, ++ .ip = "::", ++ .result = BIND_REJECT, + }, + { +- "bind6 deny specific IP & port", ++ .descr = "bind6 deny specific IP & port", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + +@@ -247,16 +213,16 @@ static struct sock_test tests[] = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, +- BPF_CGROUP_INET6_POST_BIND, +- BPF_CGROUP_INET6_POST_BIND, +- AF_INET6, +- SOCK_STREAM, +- "::1", +- 8193, +- BIND_REJECT, ++ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, ++ .attach_type = BPF_CGROUP_INET6_POST_BIND, ++ .domain = AF_INET6, ++ .type = SOCK_STREAM, ++ .ip = "::1", ++ .port = 8193, ++ .result = BIND_REJECT, + }, + { +- "bind4 allow specific IP & port", ++ .descr = "bind4 allow specific IP & port", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + +@@ -277,41 +243,39 @@ static struct sock_test tests[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, +- BPF_CGROUP_INET4_POST_BIND, +- BPF_CGROUP_INET4_POST_BIND, +- AF_INET, +- SOCK_STREAM, +- "127.0.0.1", +- 4098, +- SUCCESS, ++ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, ++ .attach_type = BPF_CGROUP_INET4_POST_BIND, ++ .domain = AF_INET, ++ .type = SOCK_STREAM, ++ .ip = "127.0.0.1", ++ .port = 4098, ++ .result = SUCCESS, + }, + { +- "bind4 allow all", ++ .descr = "bind4 allow all", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, +- BPF_CGROUP_INET4_POST_BIND, +- BPF_CGROUP_INET4_POST_BIND, +- AF_INET, +- SOCK_STREAM, +- "0.0.0.0", +- 0, +- SUCCESS, ++ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, ++ .attach_type = BPF_CGROUP_INET4_POST_BIND, ++ .domain = AF_INET, ++ .type = SOCK_STREAM, ++ .ip = "0.0.0.0", ++ .result = SUCCESS, + }, + { +- "bind6 allow all", ++ .descr = "bind6 allow all", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, +- BPF_CGROUP_INET6_POST_BIND, +- BPF_CGROUP_INET6_POST_BIND, +- AF_INET6, +- SOCK_STREAM, +- "::", +- 0, +- SUCCESS, ++ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, ++ .attach_type = BPF_CGROUP_INET6_POST_BIND, ++ .domain = AF_INET6, ++ .type = SOCK_STREAM, ++ .ip = "::", ++ .result = SUCCESS, + }, + }; + diff --git a/patches.suse/bpf-selftests-convert-xdp_link-test-to-ASSERT_-macro.patch b/patches.suse/bpf-selftests-convert-xdp_link-test-to-ASSERT_-macro.patch new file mode 100644 index 0000000..190e13c --- /dev/null +++ b/patches.suse/bpf-selftests-convert-xdp_link-test-to-ASSERT_-macro.patch @@ -0,0 +1,172 @@ +From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= +Date: Fri, 7 Jan 2022 23:11:14 +0100 +Subject: bpf/selftests: convert xdp_link test to ASSERT_* macros +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.17-rc1 +Git-commit: 4b27480dcaa71e7ee9f56907e419c6a1511fd2b2 +References: jsc#PED-1368 + +Convert the selftest to use the preferred ASSERT_* macros instead of the +deprecated CHECK(). + +v2: +- Don't add if statements around checks if they weren't there before. + +Signed-off-by: Toke Høiland-Jørgensen +Link: https://lore.kernel.org/r/20220107221115.326171-2-toke@redhat.com +Signed-off-by: Alexei Starovoitov +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/xdp_link.c | 56 +++++++++------------- + 1 file changed, 25 insertions(+), 31 deletions(-) + +--- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c ++++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c +@@ -8,46 +8,47 @@ + + void serial_test_xdp_link(void) + { +- __u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err; + DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1); + struct test_xdp_link *skel1 = NULL, *skel2 = NULL; ++ __u32 id1, id2, id0 = 0, prog_fd1, prog_fd2; + struct bpf_link_info link_info; + struct bpf_prog_info prog_info; + struct bpf_link *link; ++ int err; + __u32 link_info_len = sizeof(link_info); + __u32 prog_info_len = sizeof(prog_info); + + skel1 = test_xdp_link__open_and_load(); +- if (CHECK(!skel1, "skel_load", "skeleton open and load failed\n")) ++ if (!ASSERT_OK_PTR(skel1, "skel_load")) + goto cleanup; + prog_fd1 = bpf_program__fd(skel1->progs.xdp_handler); + + skel2 = test_xdp_link__open_and_load(); +- if (CHECK(!skel2, "skel_load", "skeleton open and load failed\n")) ++ if (!ASSERT_OK_PTR(skel2, "skel_load")) + goto cleanup; + prog_fd2 = bpf_program__fd(skel2->progs.xdp_handler); + + memset(&prog_info, 0, sizeof(prog_info)); + err = bpf_obj_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len); +- if (CHECK(err, "fd_info1", "failed %d\n", -errno)) ++ if (!ASSERT_OK(err, "fd_info1")) + goto cleanup; + id1 = prog_info.id; + + memset(&prog_info, 0, sizeof(prog_info)); + err = bpf_obj_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len); +- if (CHECK(err, "fd_info2", "failed %d\n", -errno)) ++ if (!ASSERT_OK(err, "fd_info2")) + goto cleanup; + id2 = prog_info.id; + + /* set initial prog attachment */ + err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts); +- if (CHECK(err, "fd_attach", "initial prog attach failed: %d\n", err)) ++ if (!ASSERT_OK(err, "fd_attach")) + goto cleanup; + + /* validate prog ID */ + err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); +- CHECK(err || id0 != id1, "id1_check", +- "loaded prog id %u != id1 %u, err %d", id0, id1, err); ++ if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val")) ++ goto cleanup; + + /* BPF link is not allowed to replace prog attachment */ + link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO); +@@ -62,7 +63,7 @@ void serial_test_xdp_link(void) + /* detach BPF program */ + opts.old_fd = prog_fd1; + err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts); +- if (CHECK(err, "prog_detach", "failed %d\n", err)) ++ if (!ASSERT_OK(err, "prog_detach")) + goto cleanup; + + /* now BPF link should attach successfully */ +@@ -73,24 +74,23 @@ void serial_test_xdp_link(void) + + /* validate prog ID */ + err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); +- if (CHECK(err || id0 != id1, "id1_check", +- "loaded prog id %u != id1 %u, err %d", id0, id1, err)) ++ if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val")) + goto cleanup; + + /* BPF prog attach is not allowed to replace BPF link */ + opts.old_fd = prog_fd1; + err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts); +- if (CHECK(!err, "prog_attach_fail", "unexpected success\n")) ++ if (!ASSERT_ERR(err, "prog_attach_fail")) + goto cleanup; + + /* Can't force-update when BPF link is active */ + err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0); +- if (CHECK(!err, "prog_update_fail", "unexpected success\n")) ++ if (!ASSERT_ERR(err, "prog_update_fail")) + goto cleanup; + + /* Can't force-detach when BPF link is active */ + err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0); +- if (CHECK(!err, "prog_detach_fail", "unexpected success\n")) ++ if (!ASSERT_ERR(err, "prog_detach_fail")) + goto cleanup; + + /* BPF link is not allowed to replace another BPF link */ +@@ -110,40 +110,34 @@ void serial_test_xdp_link(void) + skel2->links.xdp_handler = link; + + err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); +- if (CHECK(err || id0 != id2, "id2_check", +- "loaded prog id %u != id2 %u, err %d", id0, id1, err)) ++ if (!ASSERT_OK(err, "id2_check_err") || !ASSERT_EQ(id0, id2, "id2_check_val")) + goto cleanup; + + /* updating program under active BPF link works as expected */ + err = bpf_link__update_program(link, skel1->progs.xdp_handler); +- if (CHECK(err, "link_upd", "failed: %d\n", err)) ++ if (!ASSERT_OK(err, "link_upd")) + goto cleanup; + + memset(&link_info, 0, sizeof(link_info)); + err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len); +- if (CHECK(err, "link_info", "failed: %d\n", err)) ++ if (!ASSERT_OK(err, "link_info")) + goto cleanup; + +- CHECK(link_info.type != BPF_LINK_TYPE_XDP, "link_type", +- "got %u != exp %u\n", link_info.type, BPF_LINK_TYPE_XDP); +- CHECK(link_info.prog_id != id1, "link_prog_id", +- "got %u != exp %u\n", link_info.prog_id, id1); +- CHECK(link_info.xdp.ifindex != IFINDEX_LO, "link_ifindex", +- "got %u != exp %u\n", link_info.xdp.ifindex, IFINDEX_LO); ++ ASSERT_EQ(link_info.type, BPF_LINK_TYPE_XDP, "link_type"); ++ ASSERT_EQ(link_info.prog_id, id1, "link_prog_id"); ++ ASSERT_EQ(link_info.xdp.ifindex, IFINDEX_LO, "link_ifindex"); + + err = bpf_link__detach(link); +- if (CHECK(err, "link_detach", "failed %d\n", err)) ++ if (!ASSERT_OK(err, "link_detach")) + goto cleanup; + + memset(&link_info, 0, sizeof(link_info)); + err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len); +- if (CHECK(err, "link_info", "failed: %d\n", err)) +- goto cleanup; +- CHECK(link_info.prog_id != id1, "link_prog_id", +- "got %u != exp %u\n", link_info.prog_id, id1); ++ ++ ASSERT_OK(err, "link_info"); ++ ASSERT_EQ(link_info.prog_id, id1, "link_prog_id"); + /* ifindex should be zeroed out */ +- CHECK(link_info.xdp.ifindex != 0, "link_ifindex", +- "got %u != exp %u\n", link_info.xdp.ifindex, 0); ++ ASSERT_EQ(link_info.xdp.ifindex, 0, "link_ifindex"); + + cleanup: + test_xdp_link__destroy(skel1); diff --git a/patches.suse/bpf-sockmap-Do-not-ignore-orig_len-parameter.patch b/patches.suse/bpf-sockmap-Do-not-ignore-orig_len-parameter.patch new file mode 100644 index 0000000..e2e15de --- /dev/null +++ b/patches.suse/bpf-sockmap-Do-not-ignore-orig_len-parameter.patch @@ -0,0 +1,41 @@ +From: Eric Dumazet +Date: Wed, 2 Mar 2022 08:17:22 -0800 +Subject: bpf, sockmap: Do not ignore orig_len parameter +Patch-mainline: v5.17-rc7 +Git-commit: 60ce37b03917e593d8e5d8bcc7ec820773daf81d +References: jsc#PED-1368 + +Currently, sk_psock_verdict_recv() returns skb->len + +This is problematic because tcp_read_sock() might have +passed orig_len < skb->len, due to the presence of TCP urgent data. + +This causes an infinite loop from tcp_read_sock() + +Followup patch will make tcp_read_sock() more robust vs bad actors. + +Fixes: ef5659280eb1 ("bpf, sockmap: Allow skipping sk_skb parser program") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Acked-by: John Fastabend +Acked-by: Jakub Sitnicki +Tested-by: Jakub Sitnicki +Acked-by: Daniel Borkmann +Link: https://lore.kernel.org/r/20220302161723.3910001-1-eric.dumazet@gmail.com +Signed-off-by: Jakub Kicinski +Acked-by: Shung-Hsi Yu +--- + net/core/skmsg.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/skmsg.c ++++ b/net/core/skmsg.c +@@ -1153,7 +1153,7 @@ static int sk_psock_verdict_recv(read_de + struct sk_psock *psock; + struct bpf_prog *prog; + int ret = __SK_DROP; +- int len = skb->len; ++ int len = orig_len; + + /* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */ + skb = skb_clone(skb, GFP_ATOMIC); diff --git a/patches.suse/bpf-sockmap-Fix-double-bpf_prog_put-on-error-case-in.patch b/patches.suse/bpf-sockmap-Fix-double-bpf_prog_put-on-error-case-in.patch new file mode 100644 index 0000000..73ed73c --- /dev/null +++ b/patches.suse/bpf-sockmap-Fix-double-bpf_prog_put-on-error-case-in.patch @@ -0,0 +1,113 @@ +From: John Fastabend +Date: Tue, 4 Jan 2022 13:46:45 -0800 +Subject: bpf, sockmap: Fix double bpf_prog_put on error case in map_link +Patch-mainline: v5.17-rc1 +Git-commit: 218d747a4142f281a256687bb513a135c905867b +References: jsc#PED-1368 + +sock_map_link() is called to update a sockmap entry with a sk. But, if the +sock_map_init_proto() call fails then we return an error to the map_update +op against the sockmap. In the error path though we need to cleanup psock +and dec the refcnt on any programs associated with the map, because we +refcnt them early in the update process to ensure they are pinned for the +psock. (This avoids a race where user deletes programs while also updating +the map with new socks.) + +In current code we do the prog refcnt dec explicitely by calling +bpf_prog_put() when the program was found in the map. But, after commit +'38207a5e81230' in this error path we've already done the prog to psock +assignment so the programs have a reference from the psock as well. This +then causes the psock tear down logic, invoked by sk_psock_put() in the +error path, to similarly call bpf_prog_put on the programs there. + +To be explicit this logic does the prog->psock assignment: + + if (msg_*) + psock_set_prog(...) + +Then the error path under the out_progs label does a similar check and +dec with: + + if (msg_*) + bpf_prog_put(...) + +And the teardown logic sk_psock_put() does ... + + psock_set_prog(msg_*, NULL) + +... triggering another bpf_prog_put(...). Then KASAN gives us this splat, +found by syzbot because we've created an inbalance between bpf_prog_inc and +bpf_prog_put calling put twice on the program. + + BUG: KASAN: vmalloc-out-of-bounds in __bpf_prog_put kernel/bpf/syscall.c:1812 [inline] + BUG: KASAN: vmalloc-out-of-bounds in __bpf_prog_put kernel/bpf/syscall.c:1812 [inline] kernel/bpf/syscall.c:1829 + BUG: KASAN: vmalloc-out-of-bounds in bpf_prog_put+0x8c/0x4f0 kernel/bpf/syscall.c:1829 kernel/bpf/syscall.c:1829 + Read of size 8 at addr ffffc90000e76038 by task syz-executor020/3641 + +To fix clean up error path so it doesn't try to do the bpf_prog_put in the +error path once progs are assigned then it relies on the normal psock +tear down logic to do complete cleanup. + +For completness we also cover the case whereh sk_psock_init_strp() fails, +but this is not expected because it indicates an incorrect socket type +and should be caught earlier. + +Fixes: 38207a5e8123 ("bpf, sockmap: Attach map progs to psock early for feature probes") +Reported-by: syzbot+bb73e71cf4b8fd376a4f@syzkaller.appspotmail.com +Signed-off-by: John Fastabend +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20220104214645.290900-1-john.fastabend@gmail.com +Acked-by: Shung-Hsi Yu +--- + net/core/sock_map.c | 21 +++++++++++++-------- + 1 file changed, 13 insertions(+), 8 deletions(-) + +--- a/net/core/sock_map.c ++++ b/net/core/sock_map.c +@@ -292,15 +292,23 @@ static int sock_map_link(struct bpf_map + if (skb_verdict) + psock_set_prog(&psock->progs.skb_verdict, skb_verdict); + ++ /* msg_* and stream_* programs references tracked in psock after this ++ * point. Reference dec and cleanup will occur through psock destructor ++ */ + ret = sock_map_init_proto(sk, psock); +- if (ret < 0) +- goto out_drop; ++ if (ret < 0) { ++ sk_psock_put(sk, psock); ++ goto out; ++ } + + write_lock_bh(&sk->sk_callback_lock); + if (stream_parser && stream_verdict && !psock->saved_data_ready) { + ret = sk_psock_init_strp(sk, psock); +- if (ret) +- goto out_unlock_drop; ++ if (ret) { ++ write_unlock_bh(&sk->sk_callback_lock); ++ sk_psock_put(sk, psock); ++ goto out; ++ } + sk_psock_start_strp(sk, psock); + } else if (!stream_parser && stream_verdict && !psock->saved_data_ready) { + sk_psock_start_verdict(sk,psock); +@@ -309,10 +317,6 @@ static int sock_map_link(struct bpf_map + } + write_unlock_bh(&sk->sk_callback_lock); + return 0; +-out_unlock_drop: +- write_unlock_bh(&sk->sk_callback_lock); +-out_drop: +- sk_psock_put(sk, psock); + out_progs: + if (skb_verdict) + bpf_prog_put(skb_verdict); +@@ -325,6 +329,7 @@ out_put_stream_parser: + out_put_stream_verdict: + if (stream_verdict) + bpf_prog_put(stream_verdict); ++out: + return ret; + } + diff --git a/patches.suse/bpf-sockmap-Fix-return-codes-from-tcp_bpf_recvmsg_pa.patch b/patches.suse/bpf-sockmap-Fix-return-codes-from-tcp_bpf_recvmsg_pa.patch new file mode 100644 index 0000000..d819986 --- /dev/null +++ b/patches.suse/bpf-sockmap-Fix-return-codes-from-tcp_bpf_recvmsg_pa.patch @@ -0,0 +1,85 @@ +From: John Fastabend +Date: Tue, 4 Jan 2022 12:59:18 -0800 +Subject: bpf, sockmap: Fix return codes from tcp_bpf_recvmsg_parser() +Patch-mainline: v5.17-rc1 +Git-commit: 5b2c5540b8110eea0d67a78fb0ddb9654c58daeb +References: jsc#PED-1368 + +Applications can be confused slightly because we do not always return the +same error code as expected, e.g. what the TCP stack normally returns. For +example on a sock err sk->sk_err instead of returning the sock_error we +return EAGAIN. This usually means the application will 'try again' +instead of aborting immediately. Another example, when a shutdown event +is received we should immediately abort instead of waiting for data when +the user provides a timeout. + +These tend to not be fatal, applications usually recover, but introduces +bogus errors to the user or introduces unexpected latency. Before +'c5d2177a72a16' we fell back to the TCP stack when no data was available +so we managed to catch many of the cases here, although with the extra +latency cost of calling tcp_msg_wait_data() first. + +To fix lets duplicate the error handling in TCP stack into tcp_bpf so +that we get the same error codes. + +These were found in our CI tests that run applications against sockmap +and do longer lived testing, at least compared to test_sockmap that +does short-lived ping/pong tests, and in some of our test clusters +we deploy. + +Its non-trivial to do these in a shorter form CI tests that would be +appropriate for BPF selftests, but we are looking into it so we can +ensure this keeps working going forward. As a preview one idea is to +pull in the packetdrill testing which catches some of this. + +Fixes: c5d2177a72a16 ("bpf, sockmap: Fix race in ingress receive verdict with redirect to self") +Signed-off-by: John Fastabend +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20220104205918.286416-1-john.fastabend@gmail.com +Acked-by: Shung-Hsi Yu +--- + net/ipv4/tcp_bpf.c | 27 +++++++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +--- a/net/ipv4/tcp_bpf.c ++++ b/net/ipv4/tcp_bpf.c +@@ -196,12 +196,39 @@ msg_bytes_ready: + long timeo; + int data; + ++ if (sock_flag(sk, SOCK_DONE)) ++ goto out; ++ ++ if (sk->sk_err) { ++ copied = sock_error(sk); ++ goto out; ++ } ++ ++ if (sk->sk_shutdown & RCV_SHUTDOWN) ++ goto out; ++ ++ if (sk->sk_state == TCP_CLOSE) { ++ copied = -ENOTCONN; ++ goto out; ++ } ++ + timeo = sock_rcvtimeo(sk, nonblock); ++ if (!timeo) { ++ copied = -EAGAIN; ++ goto out; ++ } ++ ++ if (signal_pending(current)) { ++ copied = sock_intr_errno(timeo); ++ goto out; ++ } ++ + data = tcp_msg_wait_data(sk, psock, timeo); + if (data && !sk_psock_queue_empty(psock)) + goto msg_bytes_ready; + copied = -EAGAIN; + } ++out: + release_sock(sk); + sk_psock_put(sk, psock); + return copied; diff --git a/patches.suse/bpf-x64-Replace-some-stack_size-usage-with-offset-va.patch b/patches.suse/bpf-x64-Replace-some-stack_size-usage-with-offset-va.patch new file mode 100644 index 0000000..5afddd0 --- /dev/null +++ b/patches.suse/bpf-x64-Replace-some-stack_size-usage-with-offset-va.patch @@ -0,0 +1,137 @@ +From: Jiri Olsa +Date: Wed, 8 Dec 2021 20:32:43 +0100 +Subject: bpf, x64: Replace some stack_size usage with offset variables +Patch-mainline: v5.17-rc1 +Git-commit: 5edf6a1983b90371da888ca86493937ec1c8a2b5 +References: jsc#PED-1368 + +As suggested by Andrii, adding variables for registers and ip +address offsets, which makes the code more clear, rather than +abusing single stack_size variable for everything. + +Also describing the stack layout in the comment. + +There is no function change. + +Suggested-by: Andrii Nakryiko +Signed-off-by: Jiri Olsa +Signed-off-by: Alexei Starovoitov +Acked-by: John Fastabend +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211208193245.172141-4-jolsa@kernel.org +Acked-by: Shung-Hsi Yu +--- + arch/x86/net/bpf_jit_comp.c | 42 ++++++++++++++++++++++++++++-------------- + 1 file changed, 28 insertions(+), 14 deletions(-) + +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -1933,7 +1933,7 @@ int arch_prepare_bpf_trampoline(struct b + void *orig_call) + { + int ret, i, nr_args = m->nr_args; +- int stack_size = nr_args * 8; ++ int regs_off, ip_off, stack_size = nr_args * 8; + struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY]; + struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT]; + struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN]; +@@ -1948,14 +1948,33 @@ int arch_prepare_bpf_trampoline(struct b + if (!is_valid_bpf_tramp_flags(flags)) + return -EINVAL; + ++ /* Generated trampoline stack layout: ++ * ++ * RBP + 8 [ return address ] ++ * RBP + 0 [ RBP ] ++ * ++ * RBP - 8 [ return value ] BPF_TRAMP_F_CALL_ORIG or ++ * BPF_TRAMP_F_RET_FENTRY_RET flags ++ * ++ * [ reg_argN ] always ++ * [ ... ] ++ * RBP - regs_off [ reg_arg1 ] program's ctx pointer ++ * ++ * RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag ++ */ ++ + /* room for return value of orig_call or fentry prog */ + save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); + if (save_ret) + stack_size += 8; + ++ regs_off = stack_size; ++ + if (flags & BPF_TRAMP_F_IP_ARG) + stack_size += 8; /* room for IP address argument */ + ++ ip_off = stack_size; ++ + if (flags & BPF_TRAMP_F_SKIP_FRAME) + /* skip patched call instruction and point orig_call to actual + * body of the kernel function. +@@ -1973,19 +1992,14 @@ int arch_prepare_bpf_trampoline(struct b + /* Store IP address of the traced function: + * mov rax, QWORD PTR [rbp + 8] + * sub rax, X86_PATCH_SIZE +- * mov QWORD PTR [rbp - stack_size], rax ++ * mov QWORD PTR [rbp - ip_off], rax + */ + emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8); + EMIT4(0x48, 0x83, 0xe8, X86_PATCH_SIZE); +- emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -stack_size); +- +- /* Continue with stack_size for regs storage, stack will +- * be correctly restored with 'leave' instruction. +- */ +- stack_size -= 8; ++ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ip_off); + } + +- save_regs(m, &prog, nr_args, stack_size); ++ save_regs(m, &prog, nr_args, regs_off); + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + /* arg1: mov rdi, im */ +@@ -1997,7 +2011,7 @@ int arch_prepare_bpf_trampoline(struct b + } + + if (fentry->nr_progs) +- if (invoke_bpf(m, &prog, fentry, stack_size, ++ if (invoke_bpf(m, &prog, fentry, regs_off, + flags & BPF_TRAMP_F_RET_FENTRY_RET)) + return -EINVAL; + +@@ -2007,7 +2021,7 @@ int arch_prepare_bpf_trampoline(struct b + if (!branches) + return -ENOMEM; + +- if (invoke_bpf_mod_ret(m, &prog, fmod_ret, stack_size, ++ if (invoke_bpf_mod_ret(m, &prog, fmod_ret, regs_off, + branches)) { + ret = -EINVAL; + goto cleanup; +@@ -2015,7 +2029,7 @@ int arch_prepare_bpf_trampoline(struct b + } + + if (flags & BPF_TRAMP_F_CALL_ORIG) { +- restore_regs(m, &prog, nr_args, stack_size); ++ restore_regs(m, &prog, nr_args, regs_off); + + /* call original function */ + if (emit_call(&prog, orig_call, prog)) { +@@ -2045,13 +2059,13 @@ int arch_prepare_bpf_trampoline(struct b + } + + if (fexit->nr_progs) +- if (invoke_bpf(m, &prog, fexit, stack_size, false)) { ++ if (invoke_bpf(m, &prog, fexit, regs_off, false)) { + ret = -EINVAL; + goto cleanup; + } + + if (flags & BPF_TRAMP_F_RESTORE_REGS) +- restore_regs(m, &prog, nr_args, stack_size); ++ restore_regs(m, &prog, nr_args, regs_off); + + /* This needs to be done regardless. If there were fmod_ret programs, + * the return value is only updated on the stack and still needs to be diff --git a/patches.suse/bpftool-Add-SPDX-tags-to-RST-documentation-files.patch b/patches.suse/bpftool-Add-SPDX-tags-to-RST-documentation-files.patch new file mode 100644 index 0000000..0bc3f44 --- /dev/null +++ b/patches.suse/bpftool-Add-SPDX-tags-to-RST-documentation-files.patch @@ -0,0 +1,152 @@ +From: Quentin Monnet +Date: Mon, 15 Nov 2021 22:58:42 +0000 +Subject: bpftool: Add SPDX tags to RST documentation files +Patch-mainline: v5.17-rc1 +Git-commit: 4344842836e9b9a7b695dc84956cdecd83ac02e9 +References: jsc#PED-1368 + +Most files in the kernel repository have a SPDX tags. The files that +don't have such a tag (or another license boilerplate) tend to fall +under the GPL-2.0 license. In the past, bpftool's Makefile (for example) +has been marked as GPL-2.0 for that reason, when in fact all bpftool is +dual-licensed. + +To prevent a similar confusion from happening with the RST documentation +files for bpftool, let's explicitly mark all files as dual-licensed. + +Signed-off-by: Quentin Monnet +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211115225844.33943-2-quentin@isovalent.com +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/Documentation/Makefile | 2 +- + tools/bpf/bpftool/Documentation/bpftool-btf.rst | 2 ++ + tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 2 ++ + tools/bpf/bpftool/Documentation/bpftool-feature.rst | 2 ++ + tools/bpf/bpftool/Documentation/bpftool-gen.rst | 2 ++ + tools/bpf/bpftool/Documentation/bpftool-iter.rst | 2 ++ + tools/bpf/bpftool/Documentation/bpftool-link.rst | 2 ++ + tools/bpf/bpftool/Documentation/bpftool-map.rst | 2 ++ + tools/bpf/bpftool/Documentation/bpftool-net.rst | 2 ++ + tools/bpf/bpftool/Documentation/bpftool-perf.rst | 2 ++ + tools/bpf/bpftool/Documentation/bpftool-prog.rst | 2 ++ + tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst | 2 ++ + tools/bpf/bpftool/Documentation/bpftool.rst | 2 ++ + tools/bpf/bpftool/Documentation/common_options.rst | 2 ++ + 14 files changed, 27 insertions(+), 1 deletion(-) + +--- a/tools/bpf/bpftool/Documentation/Makefile ++++ b/tools/bpf/bpftool/Documentation/Makefile +@@ -24,7 +24,7 @@ man: man8 + man8: $(DOC_MAN8) + + RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null) +-RST2MAN_OPTS += --verbose ++RST2MAN_OPTS += --verbose --strip-comments + + list_pages = $(sort $(basename $(filter-out $(1),$(MAN8_RST)))) + see_also = $(subst " ",, \ +--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst +@@ -1,3 +1,5 @@ ++.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) ++ + ================ + bpftool-btf + ================ +--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +@@ -1,3 +1,5 @@ ++.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) ++ + ================ + bpftool-cgroup + ================ +--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst +@@ -1,3 +1,5 @@ ++.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) ++ + =============== + bpftool-feature + =============== +--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst +@@ -1,3 +1,5 @@ ++.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) ++ + ================ + bpftool-gen + ================ +--- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst +@@ -1,3 +1,5 @@ ++.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) ++ + ============ + bpftool-iter + ============ +--- a/tools/bpf/bpftool/Documentation/bpftool-link.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst +@@ -1,3 +1,5 @@ ++.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) ++ + ================ + bpftool-link + ================ +--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst +@@ -1,3 +1,5 @@ ++.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) ++ + ================ + bpftool-map + ================ +--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst +@@ -1,3 +1,5 @@ ++.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) ++ + ================ + bpftool-net + ================ +--- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst +@@ -1,3 +1,5 @@ ++.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) ++ + ================ + bpftool-perf + ================ +--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst +@@ -1,3 +1,5 @@ ++.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) ++ + ================ + bpftool-prog + ================ +--- a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst +@@ -1,3 +1,5 @@ ++.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) ++ + ================== + bpftool-struct_ops + ================== +--- a/tools/bpf/bpftool/Documentation/bpftool.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool.rst +@@ -1,3 +1,5 @@ ++.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) ++ + ================ + BPFTOOL + ================ +--- a/tools/bpf/bpftool/Documentation/common_options.rst ++++ b/tools/bpf/bpftool/Documentation/common_options.rst +@@ -1,3 +1,5 @@ ++.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) ++ + -h, --help + Print short help message (similar to **bpftool help**). + diff --git a/patches.suse/bpftool-Add-current-libbpf_strict-mode-to-version-ou.patch b/patches.suse/bpftool-Add-current-libbpf_strict-mode-to-version-ou.patch new file mode 100644 index 0000000..260ce10 --- /dev/null +++ b/patches.suse/bpftool-Add-current-libbpf_strict-mode-to-version-ou.patch @@ -0,0 +1,119 @@ +From: Stanislav Fomichev +Date: Mon, 15 Nov 2021 16:04:48 -0800 +Subject: bpftool: Add current libbpf_strict mode to version output +Patch-mainline: v5.17-rc1 +Git-commit: e47d0bf800e8d7f4de501987b2788c7f2ce22cd1 +References: jsc#PED-1368 + ++ bpftool --legacy --version +bpftool v5.15.0 +features: libbfd, skeletons ++ bpftool --version +bpftool v5.15.0 +features: libbfd, libbpf_strict, skeletons + ++ bpftool --legacy --help +Usage: bpftool [OPTIONS] OBJECT { COMMAND | help } + bpftool batch file FILE + bpftool version + + OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter } + OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy} | + {-V|--version} } ++ bpftool --help +Usage: bpftool [OPTIONS] OBJECT { COMMAND | help } + bpftool batch file FILE + bpftool version + + OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter } + OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy} | + {-V|--version} } + ++ bpftool --legacy +Usage: bpftool [OPTIONS] OBJECT { COMMAND | help } + bpftool batch file FILE + bpftool version + + OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter } + OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy} | + {-V|--version} } ++ bpftool +Usage: bpftool [OPTIONS] OBJECT { COMMAND | help } + bpftool batch file FILE + bpftool version + + OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter } + OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy} | + {-V|--version} } + ++ bpftool --legacy version +bpftool v5.15.0 +features: libbfd, skeletons ++ bpftool version +bpftool v5.15.0 +features: libbfd, libbpf_strict, skeletons + ++ bpftool --json --legacy version +{"version":"5.15.0","features":{"libbfd":true,"libbpf_strict":false,"skeletons":true}} ++ bpftool --json version +{"version":"5.15.0","features":{"libbfd":true,"libbpf_strict":true,"skeletons":true}} + +Suggested-by: Quentin Monnet +Signed-off-by: Stanislav Fomichev +Signed-off-by: Daniel Borkmann +Reviewed-by: Quentin Monnet +Link: https://lore.kernel.org/bpf/20211116000448.2918854-1-sdf@google.com +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/main.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +--- a/tools/bpf/bpftool/main.c ++++ b/tools/bpf/bpftool/main.c +@@ -93,6 +93,7 @@ static int do_version(int argc, char **a + jsonw_name(json_wtr, "features"); + jsonw_start_object(json_wtr); /* features */ + jsonw_bool_field(json_wtr, "libbfd", has_libbfd); ++ jsonw_bool_field(json_wtr, "libbpf_strict", !legacy_libbpf); + jsonw_bool_field(json_wtr, "skeletons", has_skeletons); + jsonw_end_object(json_wtr); /* features */ + +@@ -106,6 +107,10 @@ static int do_version(int argc, char **a + printf(" libbfd"); + nb_features++; + } ++ if (!legacy_libbpf) { ++ printf("%s libbpf_strict", nb_features++ ? "," : ""); ++ nb_features++; ++ } + if (has_skeletons) + printf("%s skeletons", nb_features++ ? "," : ""); + printf("\n"); +@@ -400,6 +405,7 @@ int main(int argc, char **argv) + { "legacy", no_argument, NULL, 'l' }, + { 0 } + }; ++ bool version_requested = false; + int opt, ret; + + last_do_help = do_help; +@@ -414,7 +420,8 @@ int main(int argc, char **argv) + options, NULL)) >= 0) { + switch (opt) { + case 'V': +- return do_version(argc, argv); ++ version_requested = true; ++ break; + case 'h': + return do_help(argc, argv); + case 'p': +@@ -479,6 +486,9 @@ int main(int argc, char **argv) + if (argc < 0) + usage(); + ++ if (version_requested) ++ return do_version(argc, argv); ++ + ret = cmd_select(cmds, argc, argv, do_help); + + if (json_output) diff --git a/patches.suse/bpftool-Add-debug-mode-for-gen_loader.patch b/patches.suse/bpftool-Add-debug-mode-for-gen_loader.patch new file mode 100644 index 0000000..dac9fcd --- /dev/null +++ b/patches.suse/bpftool-Add-debug-mode-for-gen_loader.patch @@ -0,0 +1,115 @@ +From: Alexei Starovoitov +Date: Sat, 4 Dec 2021 11:46:23 -0800 +Subject: bpftool: Add debug mode for gen_loader. +Patch-mainline: v5.17-rc1 +Git-commit: 942df4dc5ea159100466f198d8687a49c2359ca3 +References: jsc#PED-1368 + +Make -d flag functional for gen_loader style program loading. + +For example: +$ bpftool prog load -L -d test_d_path.o +... // will print: +libbpf: loading ./test_d_path.o +libbpf: elf: section(3) fentry/security_inode_getattr, size 280, link 0, flags 6, type=1 +... +libbpf: prog 'prog_close': found data map 0 (test_d_p.bss, sec 7, off 0) for insn 30 +libbpf: gen: load_btf: size 5376 +libbpf: gen: map_create: test_d_p.bss idx 0 type 2 value_type_id 118 +libbpf: map 'test_d_p.bss': created successfully, fd=0 +libbpf: gen: map_update_elem: idx 0 +libbpf: sec 'fentry/filp_close': found 1 CO-RE relocations +libbpf: record_relo_core: prog 1 insn[15] struct file 0:1 final insn_idx 15 +libbpf: gen: prog_load: type 26 insns_cnt 35 progi_idx 0 +libbpf: gen: find_attach_tgt security_inode_getattr 12 +libbpf: gen: prog_load: type 26 insns_cnt 37 progi_idx 1 +libbpf: gen: find_attach_tgt filp_close 12 +libbpf: gen: finish 0 +... // at this point libbpf finished generating loader program + 0: (bf) r6 = r1 + 1: (bf) r1 = r10 + 2: (07) r1 += -136 + 3: (b7) r2 = 136 + 4: (b7) r3 = 0 + 5: (85) call bpf_probe_read_kernel#113 + 6: (05) goto pc+104 +... // this is the assembly dump of the loader program + 390: (63) *(u32 *)(r6 +44) = r0 + 391: (18) r1 = map[idx:0]+5584 + 393: (61) r0 = *(u32 *)(r1 +0) + 394: (63) *(u32 *)(r6 +24) = r0 + 395: (b7) r0 = 0 + 396: (95) exit +err 0 // the loader program was loaded and executed successfully +(null) +func#0 @0 +... // CO-RE in the kernel logs: +CO-RE relocating STRUCT file: found target candidate [500] +prog '': relo #0: kind (0), spec is [8] STRUCT file.f_path (0:1 @ offset 16) +prog '': relo #0: matching candidate #0 [500] STRUCT file.f_path (0:1 @ offset 16) +prog '': relo #0: patched insn #15 (ALU/ALU64) imm 16 -> 16 +vmlinux_cand_cache:[11]file(500), +module_cand_cache: +... // verifier logs when it was checking test_d_path.o program: +R1 type=ctx expected=fp +0: R1=ctx(id=0,off=0,imm=0) R10=fp0 +; int BPF_PROG(prog_close, struct file *file, void *id) +0: (79) r6 = *(u64 *)(r1 +0) +func 'filp_close' arg0 has btf_id 500 type STRUCT 'file' +1: R1=ctx(id=0,off=0,imm=0) R6_w=ptr_file(id=0,off=0,imm=0) R10=fp0 +; pid_t pid = bpf_get_current_pid_tgid() >> 32; +1: (85) call bpf_get_current_pid_tgid#14 + +... // if there are multiple programs being loaded by the loader program +... // only the last program in the elf file will be printed, since +... // the same verifier log_buf is used for all PROG_LOAD commands. + +Signed-off-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211204194623.27779-1-alexei.starovoitov@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/prog.c | 20 +++++++++++--------- + 1 file changed, 11 insertions(+), 9 deletions(-) + +--- a/tools/bpf/bpftool/prog.c ++++ b/tools/bpf/bpftool/prog.c +@@ -1774,17 +1774,19 @@ static int try_loader(struct gen_loader_ + sizeof(struct bpf_prog_desc)); + int log_buf_sz = (1u << 24) - 1; + int err, fds_before, fd_delta; +- char *log_buf; ++ char *log_buf = NULL; + + ctx = alloca(ctx_sz); + memset(ctx, 0, ctx_sz); + ctx->sz = ctx_sz; +- ctx->log_level = 1; +- ctx->log_size = log_buf_sz; +- log_buf = malloc(log_buf_sz); +- if (!log_buf) +- return -ENOMEM; +- ctx->log_buf = (long) log_buf; ++ if (verifier_logs) { ++ ctx->log_level = 1 + 2 + 4; ++ ctx->log_size = log_buf_sz; ++ log_buf = malloc(log_buf_sz); ++ if (!log_buf) ++ return -ENOMEM; ++ ctx->log_buf = (long) log_buf; ++ } + opts.ctx = ctx; + opts.data = gen->data; + opts.data_sz = gen->data_sz; +@@ -1793,9 +1795,9 @@ static int try_loader(struct gen_loader_ + fds_before = count_open_fds(); + err = bpf_load_and_run(&opts); + fd_delta = count_open_fds() - fds_before; +- if (err < 0) { ++ if (err < 0 || verifier_logs) { + fprintf(stderr, "err %d\n%s\n%s", err, opts.errstr, log_buf); +- if (fd_delta) ++ if (fd_delta && err < 0) + fprintf(stderr, "loader prog leaked %d FDs\n", + fd_delta); + } diff --git a/patches.suse/bpftool-Enable-cross-building-with-clang.patch b/patches.suse/bpftool-Enable-cross-building-with-clang.patch new file mode 100644 index 0000000..5efa465 --- /dev/null +++ b/patches.suse/bpftool-Enable-cross-building-with-clang.patch @@ -0,0 +1,70 @@ +From: Jean-Philippe Brucker +Date: Thu, 16 Dec 2021 16:38:41 +0000 +Subject: bpftool: Enable cross-building with clang +Patch-mainline: v5.17-rc1 +Git-commit: bdadbb44c90aedaa74d46f1b113bd845774efa39 +References: jsc#PED-1368 + +Cross-building using clang requires passing the "-target" flag rather +than using the CROSS_COMPILE prefix. Makefile.include transforms +CROSS_COMPILE into CLANG_CROSS_FLAGS, and adds that to CFLAGS. Remove +the cross flags for the bootstrap bpftool, and erase the CROSS_COMPILE +flag for the bootstrap libbpf. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: Andrii Nakryiko +Acked-by: Quentin Monnet +Link: https://lore.kernel.org/bpf/20211216163842.829836-5-jean-philippe@linaro.org +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/Makefile | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +--- a/tools/bpf/bpftool/Makefile ++++ b/tools/bpf/bpftool/Makefile +@@ -57,7 +57,7 @@ $(LIBBPF_INTERNAL_HDRS): $(LIBBPF_HDRS_D + $(LIBBPF_BOOTSTRAP): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_BOOTSTRAP_OUTPUT) + $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \ + DESTDIR=$(LIBBPF_BOOTSTRAP_DESTDIR) prefix= \ +- ARCH= CC=$(HOSTCC) LD=$(HOSTLD) $@ install_headers ++ ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) $@ install_headers + + $(LIBBPF_BOOTSTRAP_INTERNAL_HDRS): $(LIBBPF_BOOTSTRAP_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_BOOTSTRAP_HDRS_DIR) + $(call QUIET_INSTALL, $@) +@@ -152,6 +152,9 @@ CFLAGS += -DHAVE_LIBBFD_SUPPORT + SRCS += $(BFD_SRCS) + endif + ++HOST_CFLAGS = $(subst -I$(LIBBPF_INCLUDE),-I$(LIBBPF_BOOTSTRAP_INCLUDE),\ ++ $(subst $(CLANG_CROSS_FLAGS),,$(CFLAGS))) ++ + BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool + + BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o xlated_dumper.o btf_dumper.o disasm.o) +@@ -202,7 +205,7 @@ endif + CFLAGS += $(if $(BUILD_BPF_SKELS),,-DBPFTOOL_WITHOUT_SKELETONS) + + $(BOOTSTRAP_OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c +- $(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD $< -o $@ ++ $(QUIET_CC)$(HOSTCC) $(HOST_CFLAGS) -c -MMD $< -o $@ + + $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c + $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD $< -o $@ +@@ -213,15 +216,13 @@ ifneq ($(feature-zlib), 1) + endif + + $(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP) +- $(QUIET_LINK)$(HOSTCC) $(CFLAGS) $(LDFLAGS) $(BOOTSTRAP_OBJS) $(LIBS_BOOTSTRAP) -o $@ ++ $(QUIET_LINK)$(HOSTCC) $(HOST_CFLAGS) $(LDFLAGS) $(BOOTSTRAP_OBJS) $(LIBS_BOOTSTRAP) -o $@ + + $(OUTPUT)bpftool: $(OBJS) $(LIBBPF) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $@ + + $(BOOTSTRAP_OUTPUT)%.o: %.c $(LIBBPF_BOOTSTRAP_INTERNAL_HDRS) | $(BOOTSTRAP_OUTPUT) +- $(QUIET_CC)$(HOSTCC) \ +- $(subst -I$(LIBBPF_INCLUDE),-I$(LIBBPF_BOOTSTRAP_INCLUDE),$(CFLAGS)) \ +- -c -MMD $< -o $@ ++ $(QUIET_CC)$(HOSTCC) $(HOST_CFLAGS) -c -MMD $< -o $@ + + $(OUTPUT)%.o: %.c + $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD $< -o $@ diff --git a/patches.suse/bpftool-Enable-libbpf-s-strict-mode-by-default.patch b/patches.suse/bpftool-Enable-libbpf-s-strict-mode-by-default.patch new file mode 100644 index 0000000..e56ef72 --- /dev/null +++ b/patches.suse/bpftool-Enable-libbpf-s-strict-mode-by-default.patch @@ -0,0 +1,199 @@ +From: Stanislav Fomichev +Date: Wed, 10 Nov 2021 11:23:24 -0800 +Subject: bpftool: Enable libbpf's strict mode by default +Patch-mainline: v5.17-rc1 +Git-commit: 314f14abdeca78de6b16f97d796a9966ce4b90ae +References: jsc#PED-1368 + +Otherwise, attaching with bpftool doesn't work with strict section names. + +Also: + + - Add --legacy option to switch back to pre-1.0 behavior + - Print a warning when program fails to load in strict mode to + point to --legacy flag + - By default, don't append / to the section name; in strict + mode it's relevant only for a small subset of prog types + ++ bpftool --legacy prog loadall tools/testing/selftests/bpf/test_cgroup_link.o /sys/fs/bpf/kprobe type kprobe +libbpf: failed to pin program: File exists +Error: failed to pin all programs ++ bpftool prog loadall tools/testing/selftests/bpf/test_cgroup_link.o /sys/fs/bpf/kprobe type kprobe + +v1 -> v2: + - strict by default (Quentin Monnet) + - add more info to --legacy description (Quentin Monnet) + - add bash completion (Quentin Monnet) + +Signed-off-by: Stanislav Fomichev +Signed-off-by: Daniel Borkmann +Reviewed-by: Quentin Monnet +Link: https://lore.kernel.org/bpf/20211110192324.920934-1-sdf@google.com +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/Documentation/common_options.rst | 9 ++++ + tools/bpf/bpftool/bash-completion/bpftool | 2 - + tools/bpf/bpftool/main.c | 13 ++++++ + tools/bpf/bpftool/main.h | 3 + + tools/bpf/bpftool/prog.c | 40 ++++++++++++--------- + 5 files changed, 48 insertions(+), 19 deletions(-) + +--- a/tools/bpf/bpftool/Documentation/common_options.rst ++++ b/tools/bpf/bpftool/Documentation/common_options.rst +@@ -20,3 +20,12 @@ + Print all logs available, even debug-level information. This includes + logs from libbpf as well as from the verifier, when attempting to + load programs. ++ ++-l, --legacy ++ Use legacy libbpf mode which has more relaxed BPF program ++ requirements. By default, bpftool has more strict requirements ++ about section names, changes pinning logic and doesn't support ++ some of the older non-BTF map declarations. ++ ++ See https://github.com/libbpf/libbpf/wiki/Libbpf:-the-road-to-v1.0 ++ for details. +--- a/tools/bpf/bpftool/bash-completion/bpftool ++++ b/tools/bpf/bpftool/bash-completion/bpftool +@@ -261,7 +261,7 @@ _bpftool() + # Deal with options + if [[ ${words[cword]} == -* ]]; then + local c='--version --json --pretty --bpffs --mapcompat --debug \ +- --use-loader --base-btf' ++ --use-loader --base-btf --legacy' + COMPREPLY=( $( compgen -W "$c" -- "$cur" ) ) + return 0 + fi +--- a/tools/bpf/bpftool/main.c ++++ b/tools/bpf/bpftool/main.c +@@ -31,6 +31,7 @@ bool block_mount; + bool verifier_logs; + bool relaxed_maps; + bool use_loader; ++bool legacy_libbpf; + struct btf *base_btf; + struct hashmap *refs_table; + +@@ -396,6 +397,7 @@ int main(int argc, char **argv) + { "debug", no_argument, NULL, 'd' }, + { "use-loader", no_argument, NULL, 'L' }, + { "base-btf", required_argument, NULL, 'B' }, ++ { "legacy", no_argument, NULL, 'l' }, + { 0 } + }; + int opt, ret; +@@ -408,7 +410,7 @@ int main(int argc, char **argv) + bin_name = argv[0]; + + opterr = 0; +- while ((opt = getopt_long(argc, argv, "VhpjfLmndB:", ++ while ((opt = getopt_long(argc, argv, "VhpjfLmndB:l", + options, NULL)) >= 0) { + switch (opt) { + case 'V': +@@ -454,6 +456,9 @@ int main(int argc, char **argv) + case 'L': + use_loader = true; + break; ++ case 'l': ++ legacy_libbpf = true; ++ break; + default: + p_err("unrecognized option '%s'", argv[optind - 1]); + if (json_output) +@@ -463,6 +468,12 @@ int main(int argc, char **argv) + } + } + ++ if (!legacy_libbpf) { ++ ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL); ++ if (ret) ++ p_err("failed to enable libbpf strict mode: %d", ret); ++ } ++ + argc -= optind; + argv += optind; + if (argc < 0) +--- a/tools/bpf/bpftool/main.h ++++ b/tools/bpf/bpftool/main.h +@@ -57,7 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr + #define HELP_SPEC_PROGRAM \ + "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }" + #define HELP_SPEC_OPTIONS \ +- "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}" ++ "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy}" + #define HELP_SPEC_MAP \ + "MAP := { id MAP_ID | pinned FILE | name MAP_NAME }" + #define HELP_SPEC_LINK \ +@@ -90,6 +90,7 @@ extern bool block_mount; + extern bool verifier_logs; + extern bool relaxed_maps; + extern bool use_loader; ++extern bool legacy_libbpf; + extern struct btf *base_btf; + extern struct hashmap *refs_table; + +--- a/tools/bpf/bpftool/prog.c ++++ b/tools/bpf/bpftool/prog.c +@@ -1483,8 +1483,6 @@ static int load_with_options(int argc, c + + while (argc) { + if (is_prefix(*argv, "type")) { +- char *type; +- + NEXT_ARG(); + + if (common_prog_type != BPF_PROG_TYPE_UNSPEC) { +@@ -1494,21 +1492,26 @@ static int load_with_options(int argc, c + if (!REQ_ARGS(1)) + goto err_free_reuse_maps; + +- /* Put a '/' at the end of type to appease libbpf */ +- type = malloc(strlen(*argv) + 2); +- if (!type) { +- p_err("mem alloc failed"); +- goto err_free_reuse_maps; +- } +- *type = 0; +- strcat(type, *argv); +- strcat(type, "/"); ++ err = libbpf_prog_type_by_name(*argv, &common_prog_type, ++ &expected_attach_type); ++ if (err < 0) { ++ /* Put a '/' at the end of type to appease libbpf */ ++ char *type = malloc(strlen(*argv) + 2); + +- err = get_prog_type_by_name(type, &common_prog_type, +- &expected_attach_type); +- free(type); +- if (err < 0) +- goto err_free_reuse_maps; ++ if (!type) { ++ p_err("mem alloc failed"); ++ goto err_free_reuse_maps; ++ } ++ *type = 0; ++ strcat(type, *argv); ++ strcat(type, "/"); ++ ++ err = get_prog_type_by_name(type, &common_prog_type, ++ &expected_attach_type); ++ free(type); ++ if (err < 0) ++ goto err_free_reuse_maps; ++ } + + NEXT_ARG(); + } else if (is_prefix(*argv, "map")) { +@@ -1731,6 +1734,11 @@ err_unpin: + else + bpf_object__unpin_programs(obj, pinfile); + err_close_obj: ++ if (!legacy_libbpf) { ++ p_info("Warning: bpftool is now running in libbpf strict mode and has more stringent requirements about BPF programs.\n" ++ "If it used to work for this object file but now doesn't, see --legacy option for more details.\n"); ++ } ++ + bpf_object__close(obj); + err_free_reuse_maps: + for (i = 0; i < old_map_fds; i++) diff --git a/patches.suse/bpftool-Enable-line-buffering-for-stdout.patch b/patches.suse/bpftool-Enable-line-buffering-for-stdout.patch new file mode 100644 index 0000000..2c63628 --- /dev/null +++ b/patches.suse/bpftool-Enable-line-buffering-for-stdout.patch @@ -0,0 +1,34 @@ +From: Paul Chaignon +Date: Mon, 20 Dec 2021 22:45:28 +0100 +Subject: bpftool: Enable line buffering for stdout +Patch-mainline: v5.17-rc1 +Git-commit: 1a1a0b0364ad291bd8e509da104ac8b5b1afec5d +References: jsc#PED-1368 + +The output of bpftool prog tracelog is currently buffered, which is +inconvenient when piping the output into other commands. A simple +tracelog | grep will typically not display anything. This patch fixes it +by enabling line buffering on stdout for the whole bpftool binary. + +Fixes: 30da46b5dc3a ("tools: bpftool: add a command to dump the trace pipe") +Signed-off-by: Quentin Monnet +Signed-off-by: Paul Chaignon +Signed-off-by: Andrii Nakryiko +Acked-by: Yonghong Song +Link: https://lore.kernel.org/bpf/20211220214528.GA11706@Mem +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/main.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/tools/bpf/bpftool/main.c ++++ b/tools/bpf/bpftool/main.c +@@ -408,6 +408,8 @@ int main(int argc, char **argv) + bool version_requested = false; + int opt, ret; + ++ setlinebuf(stdout); ++ + last_do_help = do_help; + pretty_output = false; + json_output = false; diff --git a/patches.suse/bpftool-Fix-SPDX-tag-for-Makefiles-and-.gitignore.patch b/patches.suse/bpftool-Fix-SPDX-tag-for-Makefiles-and-.gitignore.patch new file mode 100644 index 0000000..9195883 --- /dev/null +++ b/patches.suse/bpftool-Fix-SPDX-tag-for-Makefiles-and-.gitignore.patch @@ -0,0 +1,57 @@ +From: Quentin Monnet +Date: Fri, 5 Nov 2021 22:19:04 +0000 +Subject: bpftool: Fix SPDX tag for Makefiles and .gitignore +Patch-mainline: v5.17-rc1 +Git-commit: 1a8b597ddabe7dc25aa9defd33949d455ee9cde8 +References: jsc#PED-1368 + +Bpftool is dual-licensed under GPLv2 and BSD-2-Clause. In commit +907b22365115 ("tools: bpftool: dual license all files") we made sure +that all its source files were indeed covered by the two licenses, and +that they had the correct SPDX tags. + +However, bpftool's Makefile, the Makefile for its documentation, and the +.gitignore file were skipped at the time (their GPL-2.0-only tag was +added later). Let's update the tags. + +Signed-off-by: Quentin Monnet +Signed-off-by: Alexei Starovoitov +Acked-by: Andrii Nakryiko +Acked-by: Tobias Klauser +Acked-by: Joe Stringer +Acked-by: Song Liu +Acked-by: Jean-Philippe Brucker +Acked-by: Jesper Dangaard Brouer +Acked-by: Jakub Kicinski +Link: https://lore.kernel.org/bpf/20211105221904.3536-1-quentin@isovalent.com +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/.gitignore | 2 +- + tools/bpf/bpftool/Documentation/Makefile | 2 +- + tools/bpf/bpftool/Makefile | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +--- a/tools/bpf/bpftool/.gitignore ++++ b/tools/bpf/bpftool/.gitignore +@@ -1,4 +1,4 @@ +-# SPDX-License-Identifier: GPL-2.0-only ++# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + *.d + /bootstrap/ + /bpftool +--- a/tools/bpf/bpftool/Documentation/Makefile ++++ b/tools/bpf/bpftool/Documentation/Makefile +@@ -1,4 +1,4 @@ +-# SPDX-License-Identifier: GPL-2.0-only ++# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + include ../../../scripts/Makefile.include + include ../../../scripts/utilities.mak + +--- a/tools/bpf/bpftool/Makefile ++++ b/tools/bpf/bpftool/Makefile +@@ -1,4 +1,4 @@ +-# SPDX-License-Identifier: GPL-2.0-only ++# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + include ../../scripts/Makefile.include + include ../../scripts/utilities.mak + diff --git a/patches.suse/bpftool-Fix-indent-in-option-lists-in-the-documentat.patch b/patches.suse/bpftool-Fix-indent-in-option-lists-in-the-documentat.patch new file mode 100644 index 0000000..7b81c51 --- /dev/null +++ b/patches.suse/bpftool-Fix-indent-in-option-lists-in-the-documentat.patch @@ -0,0 +1,136 @@ +From: Quentin Monnet +Date: Wed, 10 Nov 2021 11:46:30 +0000 +Subject: bpftool: Fix indent in option lists in the documentation +Patch-mainline: v5.17-rc1 +Git-commit: 986dec18bbf41f50edc2e0aa4ac5ef8e0f64f328 +References: jsc#PED-1368 + +Mixed indentation levels in the lists of options in bpftool's +documentation produces some unexpected results. For the "bpftool" man +page, it prints a warning: + + $ make -C bpftool.8 + GEN bpftool.8 + :26: (ERROR/3) Unexpected indentation. + +For other pages, there is no warning, but it results in a line break +appearing in the option lists in the generated man pages. + +RST paragraphs should have a uniform indentation level. Let's fix it. + +Fixes: c07ba629df97 ("tools: bpftool: Update and synchronise option list in doc and help msg") +Fixes: 8cc8c6357c8f ("tools: bpftool: Document and add bash completion for -L, -B options") +Signed-off-by: Quentin Monnet +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211110114632.24537-5-quentin@isovalent.com +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/Documentation/bpftool-btf.rst | 2 +- + tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 2 +- + tools/bpf/bpftool/Documentation/bpftool-gen.rst | 2 +- + tools/bpf/bpftool/Documentation/bpftool-link.rst | 2 +- + tools/bpf/bpftool/Documentation/bpftool-map.rst | 6 +++--- + tools/bpf/bpftool/Documentation/bpftool-prog.rst | 8 ++++---- + tools/bpf/bpftool/Documentation/bpftool.rst | 6 +++--- + 7 files changed, 14 insertions(+), 14 deletions(-) + +--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst +@@ -13,7 +13,7 @@ SYNOPSIS + **bpftool** [*OPTIONS*] **btf** *COMMAND* + + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | {**-d** | **--debug** } | +- { **-B** | **--base-btf** } } ++ { **-B** | **--base-btf** } } + + *COMMANDS* := { **dump** | **help** } + +--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +@@ -13,7 +13,7 @@ SYNOPSIS + **bpftool** [*OPTIONS*] **cgroup** *COMMAND* + + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | +- { **-f** | **--bpffs** } } ++ { **-f** | **--bpffs** } } + + *COMMANDS* := + { **show** | **list** | **tree** | **attach** | **detach** | **help** } +--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst +@@ -13,7 +13,7 @@ SYNOPSIS + **bpftool** [*OPTIONS*] **gen** *COMMAND* + + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | +- { **-L** | **--use-loader** } } ++ { **-L** | **--use-loader** } } + + *COMMAND* := { **object** | **skeleton** | **help** } + +--- a/tools/bpf/bpftool/Documentation/bpftool-link.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst +@@ -13,7 +13,7 @@ SYNOPSIS + **bpftool** [*OPTIONS*] **link** *COMMAND* + + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | +- { **-f** | **--bpffs** } | { **-n** | **--nomount** } } ++ { **-f** | **--bpffs** } | { **-n** | **--nomount** } } + + *COMMANDS* := { **show** | **list** | **pin** | **help** } + +--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst +@@ -13,11 +13,11 @@ SYNOPSIS + **bpftool** [*OPTIONS*] **map** *COMMAND* + + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | +- { **-f** | **--bpffs** } | { **-n** | **--nomount** } } ++ { **-f** | **--bpffs** } | { **-n** | **--nomount** } } + + *COMMANDS* := +- { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** +- | **delete** | **pin** | **help** } ++ { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** | ++ **delete** | **pin** | **help** } + + MAP COMMANDS + ============= +--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst +@@ -13,12 +13,12 @@ SYNOPSIS + **bpftool** [*OPTIONS*] **prog** *COMMAND* + + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | +- { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } | +- { **-L** | **--use-loader** } } ++ { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } | ++ { **-L** | **--use-loader** } } + + *COMMANDS* := +- { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** +- | **loadall** | **help** } ++ { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** | ++ **loadall** | **help** } + + PROG COMMANDS + ============= +--- a/tools/bpf/bpftool/Documentation/bpftool.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool.rst +@@ -19,14 +19,14 @@ SYNOPSIS + *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** | **feature** } + + *OPTIONS* := { { **-V** | **--version** } | +- { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } ++ { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } + + *MAP-COMMANDS* := + { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** | +- **delete** | **pin** | **event_pipe** | **help** } ++ **delete** | **pin** | **event_pipe** | **help** } + + *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** | +- **load** | **attach** | **detach** | **help** } ++ **load** | **attach** | **detach** | **help** } + + *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } + diff --git a/patches.suse/bpftool-Fix-mixed-indentation-in-documentation.patch b/patches.suse/bpftool-Fix-mixed-indentation-in-documentation.patch new file mode 100644 index 0000000..47c3e84 --- /dev/null +++ b/patches.suse/bpftool-Fix-mixed-indentation-in-documentation.patch @@ -0,0 +1,128 @@ +From: Quentin Monnet +Date: Wed, 10 Nov 2021 11:46:32 +0000 +Subject: bpftool: Fix mixed indentation in documentation +Patch-mainline: v5.17-rc1 +Git-commit: b06be5651f08bc5bc305e2a3c722ddb33b783ee5 +References: jsc#PED-1368 + +Some paragraphs in bpftool's documentation have a mix of tabs and spaces +for indentation. Let's make it consistent. + +This patch brings no change to the text content. + +Signed-off-by: Quentin Monnet +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211110114632.24537-7-quentin@isovalent.com +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 10 +-- + tools/bpf/bpftool/Documentation/bpftool-net.rst | 66 ++++++++++----------- + 2 files changed, 38 insertions(+), 38 deletions(-) + +--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +@@ -30,9 +30,9 @@ CGROUP COMMANDS + | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } + | *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** | + | **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** | +-| **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** | +-| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** | +-| **sock_release** } ++| **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** | ++| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** | ++| **sock_release** } + | *ATTACH_FLAGS* := { **multi** | **override** } + + DESCRIPTION +@@ -98,9 +98,9 @@ DESCRIPTION + **sendmsg6** call to sendto(2), sendmsg(2), sendmmsg(2) for an + unconnected udp6 socket (since 4.18); + **recvmsg4** call to recvfrom(2), recvmsg(2), recvmmsg(2) for +- an unconnected udp4 socket (since 5.2); ++ an unconnected udp4 socket (since 5.2); + **recvmsg6** call to recvfrom(2), recvmsg(2), recvmmsg(2) for +- an unconnected udp6 socket (since 5.2); ++ an unconnected udp6 socket (since 5.2); + **sysctl** sysctl access (since 5.2); + **getsockopt** call to getsockopt (since 5.3); + **setsockopt** call to setsockopt (since 5.3); +--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst +@@ -31,44 +31,44 @@ NET COMMANDS + DESCRIPTION + =========== + **bpftool net { show | list }** [ **dev** *NAME* ] +- List bpf program attachments in the kernel networking subsystem. ++ List bpf program attachments in the kernel networking subsystem. + +- Currently, only device driver xdp attachments and tc filter +- classification/action attachments are implemented, i.e., for +- program types **BPF_PROG_TYPE_SCHED_CLS**, +- **BPF_PROG_TYPE_SCHED_ACT** and **BPF_PROG_TYPE_XDP**. +- For programs attached to a particular cgroup, e.g., +- **BPF_PROG_TYPE_CGROUP_SKB**, **BPF_PROG_TYPE_CGROUP_SOCK**, +- **BPF_PROG_TYPE_SOCK_OPS** and **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, +- users can use **bpftool cgroup** to dump cgroup attachments. +- For sk_{filter, skb, msg, reuseport} and lwt/seg6 +- bpf programs, users should consult other tools, e.g., iproute2. +- +- The current output will start with all xdp program attachments, followed by +- all tc class/qdisc bpf program attachments. Both xdp programs and +- tc programs are ordered based on ifindex number. If multiple bpf +- programs attached to the same networking device through **tc filter**, +- the order will be first all bpf programs attached to tc classes, then +- all bpf programs attached to non clsact qdiscs, and finally all +- bpf programs attached to root and clsact qdisc. ++ Currently, only device driver xdp attachments and tc filter ++ classification/action attachments are implemented, i.e., for ++ program types **BPF_PROG_TYPE_SCHED_CLS**, ++ **BPF_PROG_TYPE_SCHED_ACT** and **BPF_PROG_TYPE_XDP**. ++ For programs attached to a particular cgroup, e.g., ++ **BPF_PROG_TYPE_CGROUP_SKB**, **BPF_PROG_TYPE_CGROUP_SOCK**, ++ **BPF_PROG_TYPE_SOCK_OPS** and **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, ++ users can use **bpftool cgroup** to dump cgroup attachments. ++ For sk_{filter, skb, msg, reuseport} and lwt/seg6 ++ bpf programs, users should consult other tools, e.g., iproute2. ++ ++ The current output will start with all xdp program attachments, followed by ++ all tc class/qdisc bpf program attachments. Both xdp programs and ++ tc programs are ordered based on ifindex number. If multiple bpf ++ programs attached to the same networking device through **tc filter**, ++ the order will be first all bpf programs attached to tc classes, then ++ all bpf programs attached to non clsact qdiscs, and finally all ++ bpf programs attached to root and clsact qdisc. + + **bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** ] +- Attach bpf program *PROG* to network interface *NAME* with +- type specified by *ATTACH_TYPE*. Previously attached bpf program +- can be replaced by the command used with **overwrite** option. +- Currently, only XDP-related modes are supported for *ATTACH_TYPE*. +- +- *ATTACH_TYPE* can be of: +- **xdp** - try native XDP and fallback to generic XDP if NIC driver does not support it; +- **xdpgeneric** - Generic XDP. runs at generic XDP hook when packet already enters receive path as skb; +- **xdpdrv** - Native XDP. runs earliest point in driver's receive path; +- **xdpoffload** - Offload XDP. runs directly on NIC on each packet reception; ++ Attach bpf program *PROG* to network interface *NAME* with ++ type specified by *ATTACH_TYPE*. Previously attached bpf program ++ can be replaced by the command used with **overwrite** option. ++ Currently, only XDP-related modes are supported for *ATTACH_TYPE*. ++ ++ *ATTACH_TYPE* can be of: ++ **xdp** - try native XDP and fallback to generic XDP if NIC driver does not support it; ++ **xdpgeneric** - Generic XDP. runs at generic XDP hook when packet already enters receive path as skb; ++ **xdpdrv** - Native XDP. runs earliest point in driver's receive path; ++ **xdpoffload** - Offload XDP. runs directly on NIC on each packet reception; + + **bpftool** **net detach** *ATTACH_TYPE* **dev** *NAME* +- Detach bpf program attached to network interface *NAME* with +- type specified by *ATTACH_TYPE*. To detach bpf program, same +- *ATTACH_TYPE* previously used for attach must be specified. +- Currently, only XDP-related modes are supported for *ATTACH_TYPE*. ++ Detach bpf program attached to network interface *NAME* with ++ type specified by *ATTACH_TYPE*. To detach bpf program, same ++ *ATTACH_TYPE* previously used for attach must be specified. ++ Currently, only XDP-related modes are supported for *ATTACH_TYPE*. + + **bpftool net help** + Print short help message. diff --git a/patches.suse/bpftool-Migrate-1-err-checks-of-libbpf-fn-calls.patch b/patches.suse/bpftool-Migrate-1-err-checks-of-libbpf-fn-calls.patch new file mode 100644 index 0000000..a653651 --- /dev/null +++ b/patches.suse/bpftool-Migrate-1-err-checks-of-libbpf-fn-calls.patch @@ -0,0 +1,46 @@ +From: Dave Marchevsky +Date: Mon, 1 Nov 2021 15:43:54 -0700 +Subject: bpftool: Migrate -1 err checks of libbpf fn calls +Patch-mainline: v5.17-rc1 +Git-commit: 60f270753960291895cdd07d360c4e09c56c4596 +References: jsc#PED-1368 + +Per [0], callers of libbpf functions with LIBBPF_STRICT_DIRECT_ERRS set +should handle negative error codes of various values (e.g. -EINVAL). +Migrate two callsites which were explicitly checking for -1 only to +handle the new scheme. + + [0]: https://github.com/libbpf/libbpf/wiki/Libbpf-1.0-migration-guide#direct-error-code-returning-libbpf_strict_direct_errs + +Signed-off-by: Dave Marchevsky +Signed-off-by: Andrii Nakryiko +Acked-by: Quentin Monnet +Link: https://lore.kernel.org/bpf/20211101224357.2651181-2-davemarchevsky@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/btf_dumper.c | 2 +- + tools/bpf/bpftool/struct_ops.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/tools/bpf/bpftool/btf_dumper.c ++++ b/tools/bpf/bpftool/btf_dumper.c +@@ -52,7 +52,7 @@ static int dump_prog_id_as_func_ptr(cons + + /* Get the bpf_prog's name. Obtain from func_info. */ + prog_fd = bpf_prog_get_fd_by_id(prog_id); +- if (prog_fd == -1) ++ if (prog_fd < 0) + goto print; + + prog_info = bpf_program__get_prog_info_linear(prog_fd, +--- a/tools/bpf/bpftool/struct_ops.c ++++ b/tools/bpf/bpftool/struct_ops.c +@@ -252,7 +252,7 @@ static struct res do_one_id(const char * + } + + fd = bpf_map_get_fd_by_id(id); +- if (fd == -1) { ++ if (fd < 0) { + p_err("can't get map by id (%lu): %s", id, strerror(errno)); + res.nr_errs++; + return res; diff --git a/patches.suse/bpftool-Migrate-off-of-deprecated-bpf_create_map_xat.patch b/patches.suse/bpftool-Migrate-off-of-deprecated-bpf_create_map_xat.patch new file mode 100644 index 0000000..7187695 --- /dev/null +++ b/patches.suse/bpftool-Migrate-off-of-deprecated-bpf_create_map_xat.patch @@ -0,0 +1,86 @@ +From: Andrii Nakryiko +Date: Wed, 1 Dec 2021 15:28:18 -0800 +Subject: bpftool: Migrate off of deprecated bpf_create_map_xattr() API +Patch-mainline: v5.17-rc1 +Git-commit: a15d408b839af421fba0a2ff6df193c13ef753d4 +References: jsc#PED-1368 + +Switch to bpf_map_create() API instead. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211201232824.3166325-4-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/map.c | 23 +++++++++++++---------- + 1 file changed, 13 insertions(+), 10 deletions(-) + +--- a/tools/bpf/bpftool/map.c ++++ b/tools/bpf/bpftool/map.c +@@ -1261,7 +1261,10 @@ static int do_pin(int argc, char **argv) + + static int do_create(int argc, char **argv) + { +- struct bpf_create_map_attr attr = { NULL, }; ++ LIBBPF_OPTS(bpf_map_create_opts, attr); ++ enum bpf_map_type map_type = BPF_MAP_TYPE_UNSPEC; ++ __u32 key_size = 0, value_size = 0, max_entries = 0; ++ const char *map_name = NULL; + const char *pinfile; + int err = -1, fd; + +@@ -1276,30 +1279,30 @@ static int do_create(int argc, char **ar + if (is_prefix(*argv, "type")) { + NEXT_ARG(); + +- if (attr.map_type) { ++ if (map_type) { + p_err("map type already specified"); + goto exit; + } + +- attr.map_type = map_type_from_str(*argv); +- if ((int)attr.map_type < 0) { ++ map_type = map_type_from_str(*argv); ++ if ((int)map_type < 0) { + p_err("unrecognized map type: %s", *argv); + goto exit; + } + NEXT_ARG(); + } else if (is_prefix(*argv, "name")) { + NEXT_ARG(); +- attr.name = GET_ARG(); ++ map_name = GET_ARG(); + } else if (is_prefix(*argv, "key")) { +- if (parse_u32_arg(&argc, &argv, &attr.key_size, ++ if (parse_u32_arg(&argc, &argv, &key_size, + "key size")) + goto exit; + } else if (is_prefix(*argv, "value")) { +- if (parse_u32_arg(&argc, &argv, &attr.value_size, ++ if (parse_u32_arg(&argc, &argv, &value_size, + "value size")) + goto exit; + } else if (is_prefix(*argv, "entries")) { +- if (parse_u32_arg(&argc, &argv, &attr.max_entries, ++ if (parse_u32_arg(&argc, &argv, &max_entries, + "max entries")) + goto exit; + } else if (is_prefix(*argv, "flags")) { +@@ -1340,14 +1343,14 @@ static int do_create(int argc, char **ar + } + } + +- if (!attr.name) { ++ if (!map_name) { + p_err("map name not specified"); + goto exit; + } + + set_max_rlimit(); + +- fd = bpf_create_map_xattr(&attr); ++ fd = bpf_map_create(map_type, map_name, key_size, value_size, max_entries, &attr); + if (fd < 0) { + p_err("map create failed: %s", strerror(errno)); + goto exit; diff --git a/patches.suse/bpftool-Normalize-compile-rules-to-specify-output-fi.patch b/patches.suse/bpftool-Normalize-compile-rules-to-specify-output-fi.patch new file mode 100644 index 0000000..4649d1e --- /dev/null +++ b/patches.suse/bpftool-Normalize-compile-rules-to-specify-output-fi.patch @@ -0,0 +1,69 @@ +From: Andrii Nakryiko +Date: Wed, 10 Nov 2021 21:36:16 -0800 +Subject: bpftool: Normalize compile rules to specify output file last +Patch-mainline: v5.17-rc1 +Git-commit: 6501182c08f76e4118177b42614174547e1bd149 +References: jsc#PED-1368 + +When dealing with verbose Makefile output, it's extremely confusing when +compiler invocation commands don't specify -o as the last +argument. Normalize bpftool's Makefile to do just that, as most other +BPF-related Makefiles are already doing that. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211111053624.190580-2-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/Makefile | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +--- a/tools/bpf/bpftool/Makefile ++++ b/tools/bpf/bpftool/Makefile +@@ -187,7 +187,8 @@ $(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUT + -I$(if $(OUTPUT),$(OUTPUT),.) \ + -I$(srctree)/tools/include/uapi/ \ + -I$(LIBBPF_BOOTSTRAP_INCLUDE) \ +- -g -O2 -Wall -target bpf -c $< -o $@ && $(LLVM_STRIP) -g $@ ++ -g -O2 -Wall -target bpf -c $< -o $@ ++ $(Q)$(LLVM_STRIP) -g $@ + + $(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP) + $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton $< > $@ +@@ -202,10 +203,10 @@ endif + CFLAGS += $(if $(BUILD_BPF_SKELS),,-DBPFTOOL_WITHOUT_SKELETONS) + + $(BOOTSTRAP_OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c +- $(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD -o $@ $< ++ $(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD $< -o $@ + + $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c +- $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< ++ $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD $< -o $@ + + $(OUTPUT)feature.o: + ifneq ($(feature-zlib), 1) +@@ -213,19 +214,18 @@ ifneq ($(feature-zlib), 1) + endif + + $(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP) +- $(QUIET_LINK)$(HOSTCC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) \ +- $(LIBS_BOOTSTRAP) ++ $(QUIET_LINK)$(HOSTCC) $(CFLAGS) $(LDFLAGS) $(BOOTSTRAP_OBJS) $(LIBS_BOOTSTRAP) -o $@ + + $(OUTPUT)bpftool: $(OBJS) $(LIBBPF) +- $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) ++ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $@ + + $(BOOTSTRAP_OUTPUT)%.o: %.c $(LIBBPF_BOOTSTRAP_INTERNAL_HDRS) | $(BOOTSTRAP_OUTPUT) + $(QUIET_CC)$(HOSTCC) \ + $(subst -I$(LIBBPF_INCLUDE),-I$(LIBBPF_BOOTSTRAP_INCLUDE),$(CFLAGS)) \ +- -c -MMD -o $@ $< ++ -c -MMD $< -o $@ + + $(OUTPUT)%.o: %.c +- $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< ++ $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD $< -o $@ + + feature-detect-clean: + $(call QUIET_CLEAN, feature-detect) diff --git a/patches.suse/bpftool-Probe-for-bounded-loop-support.patch b/patches.suse/bpftool-Probe-for-bounded-loop-support.patch new file mode 100644 index 0000000..c045bc3 --- /dev/null +++ b/patches.suse/bpftool-Probe-for-bounded-loop-support.patch @@ -0,0 +1,73 @@ +From: Paul Chaignon +Date: Tue, 4 Jan 2022 18:59:57 +0100 +Subject: bpftool: Probe for bounded loop support +Patch-mainline: v5.17-rc1 +Git-commit: c04fb2b0bd9275969be3b0a95f9c3ef76b1bfb73 +References: jsc#PED-1368 + +This patch introduces a new probe to check whether the verifier supports +bounded loops as introduced in commit 2589726d12a1 ("bpf: introduce +bounded loops"). This patch will allow BPF users such as Cilium to probe +for loop support on startup and only unconditionally unroll loops on +older kernels. + +The results are displayed as part of the miscellaneous section, as shown +below. + + $ bpftool feature probe | grep loops + Bounded loop support is available + $ bpftool feature probe macro | grep LOOPS + #define HAVE_BOUNDED_LOOPS + $ bpftool feature probe -j | jq .misc + { + "have_large_insn_limit": true, + "have_bounded_loops": true + } + +Signed-off-by: Paul Chaignon +Signed-off-by: Daniel Borkmann +Reviewed-by: Quentin Monnet +Link: https://lore.kernel.org/bpf/f7807c0b27d79f48e71de7b5a99c680ca4bd0151.1641314075.git.paul@isovalent.com +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/feature.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +--- a/tools/bpf/bpftool/feature.c ++++ b/tools/bpf/bpftool/feature.c +@@ -687,6 +687,27 @@ static void probe_large_insn_limit(const + "LARGE_INSN_LIMIT"); + } + ++/* ++ * Probe for bounded loop support introduced in commit 2589726d12a1 ++ * ("bpf: introduce bounded loops"). ++ */ ++static void ++probe_bounded_loops(const char *define_prefix, __u32 ifindex) ++{ ++ struct bpf_insn insns[4] = { ++ BPF_MOV64_IMM(BPF_REG_0, 10), ++ BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1), ++ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, -2), ++ BPF_EXIT_INSN() ++ }; ++ ++ probe_misc_feature(insns, ARRAY_SIZE(insns), ++ define_prefix, ifindex, ++ "have_bounded_loops", ++ "Bounded loop support", ++ "BOUNDED_LOOPS"); ++} ++ + static void + section_system_config(enum probe_component target, const char *define_prefix) + { +@@ -801,6 +822,7 @@ static void section_misc(const char *def + "/*** eBPF misc features ***/", + define_prefix); + probe_large_insn_limit(define_prefix, ifindex); ++ probe_bounded_loops(define_prefix, ifindex); + print_end_section(); + } + diff --git a/patches.suse/bpftool-Probe-for-instruction-set-extensions.patch b/patches.suse/bpftool-Probe-for-instruction-set-extensions.patch new file mode 100644 index 0000000..7c4ccb7 --- /dev/null +++ b/patches.suse/bpftool-Probe-for-instruction-set-extensions.patch @@ -0,0 +1,92 @@ +From: Paul Chaignon +Date: Tue, 4 Jan 2022 19:00:13 +0100 +Subject: bpftool: Probe for instruction set extensions +Patch-mainline: v5.17-rc1 +Git-commit: 0fd800b2456cf90ed738a1260b53acaa8843b5ae +References: jsc#PED-1368 + +This patch introduces new probes to check whether the kernel supports +instruction set extensions v2 and v3. The first introduced eBPF +instructions BPF_J{LT,LE,SLT,SLE} in commit 92b31a9af73b ("bpf: add +BPF_J{LT,LE,SLT,SLE} instructions"). The second introduces 32-bit +variants of all jump instructions in commit 092ed0968bb6 ("bpf: +verifier support JMP32"). + +These probes are useful for userspace BPF projects that want to use newer +instruction set extensions on newer kernels, to reduce the programs' +sizes or their complexity. LLVM already provides an mcpu=probe option to +automatically probe the kernel and select the newest-supported +instruction set extension. That is however not flexible enough for all +use cases. For example, in Cilium, we only want to use the v3 +instruction set extension on v5.10+, even though it is supported on all +kernels v5.1+. + +Signed-off-by: Paul Chaignon +Signed-off-by: Daniel Borkmann +Reviewed-by: Quentin Monnet +Link: https://lore.kernel.org/bpf/3bfedcd9898c1f41ac67ca61f144fec84c6c3a92.1641314075.git.paul@isovalent.com +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/feature.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 44 insertions(+) + +--- a/tools/bpf/bpftool/feature.c ++++ b/tools/bpf/bpftool/feature.c +@@ -708,6 +708,48 @@ probe_bounded_loops(const char *define_p + "BOUNDED_LOOPS"); + } + ++/* ++ * Probe for the v2 instruction set extension introduced in commit 92b31a9af73b ++ * ("bpf: add BPF_J{LT,LE,SLT,SLE} instructions"). ++ */ ++static void ++probe_v2_isa_extension(const char *define_prefix, __u32 ifindex) ++{ ++ struct bpf_insn insns[4] = { ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 0, 1), ++ BPF_MOV64_IMM(BPF_REG_0, 1), ++ BPF_EXIT_INSN() ++ }; ++ ++ probe_misc_feature(insns, ARRAY_SIZE(insns), ++ define_prefix, ifindex, ++ "have_v2_isa_extension", ++ "ISA extension v2", ++ "V2_ISA_EXTENSION"); ++} ++ ++/* ++ * Probe for the v3 instruction set extension introduced in commit 092ed0968bb6 ++ * ("bpf: verifier support JMP32"). ++ */ ++static void ++probe_v3_isa_extension(const char *define_prefix, __u32 ifindex) ++{ ++ struct bpf_insn insns[4] = { ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_JMP32_IMM(BPF_JLT, BPF_REG_0, 0, 1), ++ BPF_MOV64_IMM(BPF_REG_0, 1), ++ BPF_EXIT_INSN() ++ }; ++ ++ probe_misc_feature(insns, ARRAY_SIZE(insns), ++ define_prefix, ifindex, ++ "have_v3_isa_extension", ++ "ISA extension v3", ++ "V3_ISA_EXTENSION"); ++} ++ + static void + section_system_config(enum probe_component target, const char *define_prefix) + { +@@ -823,6 +865,8 @@ static void section_misc(const char *def + define_prefix); + probe_large_insn_limit(define_prefix, ifindex); + probe_bounded_loops(define_prefix, ifindex); ++ probe_v2_isa_extension(define_prefix, ifindex); ++ probe_v3_isa_extension(define_prefix, ifindex); + print_end_section(); + } + diff --git a/patches.suse/bpftool-Refactor-misc.-feature-probe.patch b/patches.suse/bpftool-Refactor-misc.-feature-probe.patch new file mode 100644 index 0000000..67f4e3e --- /dev/null +++ b/patches.suse/bpftool-Refactor-misc.-feature-probe.patch @@ -0,0 +1,96 @@ +From: Paul Chaignon +Date: Tue, 4 Jan 2022 18:59:29 +0100 +Subject: bpftool: Refactor misc. feature probe +Patch-mainline: v5.17-rc1 +Git-commit: b22bf1b9979a608827dea98c61ed9ec297bcc513 +References: jsc#PED-1368 + +There is currently a single miscellaneous feature probe, +HAVE_LARGE_INSN_LIMIT, to check for the 1M instructions limit in the +verifier. Subsequent patches will add additional miscellaneous probes, +which follow the same pattern at the existing probe. This patch +therefore refactors the probe to avoid code duplication in subsequent +patches. + +The BPF program type and the checked error numbers in the +HAVE_LARGE_INSN_LIMIT probe are changed to better generalize to other +probes. The feature probe retains its current behavior despite those +changes. + +Signed-off-by: Paul Chaignon +Signed-off-by: Daniel Borkmann +Reviewed-by: Quentin Monnet +Link: https://lore.kernel.org/bpf/956c9329a932c75941194f91790d01f31dfbe01b.1641314075.git.paul@isovalent.com +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/feature.c | 45 ++++++++++++++++++++++++++++---------------- + 1 file changed, 29 insertions(+), 16 deletions(-) + +--- a/tools/bpf/bpftool/feature.c ++++ b/tools/bpf/bpftool/feature.c +@@ -642,6 +642,30 @@ probe_helpers_for_progtype(enum bpf_prog + printf("\n"); + } + ++static void ++probe_misc_feature(struct bpf_insn *insns, size_t len, ++ const char *define_prefix, __u32 ifindex, ++ const char *feat_name, const char *plain_name, ++ const char *define_name) ++{ ++ LIBBPF_OPTS(bpf_prog_load_opts, opts, ++ .prog_ifindex = ifindex, ++ ); ++ bool res; ++ int fd; ++ ++ errno = 0; ++ fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", ++ insns, len, &opts); ++ res = fd >= 0 || !errno; ++ ++ if (fd >= 0) ++ close(fd); ++ ++ print_bool_feature(feat_name, plain_name, define_name, res, ++ define_prefix); ++} ++ + /* + * Probe for availability of kernel commit (5.3): + * +@@ -649,29 +673,18 @@ probe_helpers_for_progtype(enum bpf_prog + */ + static void probe_large_insn_limit(const char *define_prefix, __u32 ifindex) + { +- LIBBPF_OPTS(bpf_prog_load_opts, opts, +- .prog_ifindex = ifindex, +- ); + struct bpf_insn insns[BPF_MAXINSNS + 1]; +- bool res; +- int i, fd; ++ int i; + + for (i = 0; i < BPF_MAXINSNS; i++) + insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1); + insns[BPF_MAXINSNS] = BPF_EXIT_INSN(); + +- errno = 0; +- fd = bpf_prog_load(BPF_PROG_TYPE_SCHED_CLS, NULL, "GPL", +- insns, ARRAY_SIZE(insns), &opts); +- res = fd >= 0 || (errno != E2BIG && errno != EINVAL); +- +- if (fd >= 0) +- close(fd); +- +- print_bool_feature("have_large_insn_limit", ++ probe_misc_feature(insns, ARRAY_SIZE(insns), ++ define_prefix, ifindex, ++ "have_large_insn_limit", + "Large program size limit", +- "LARGE_INSN_LIMIT", +- res, define_prefix); ++ "LARGE_INSN_LIMIT"); + } + + static void diff --git a/patches.suse/bpftool-Reimplement-large-insn-size-limit-feature-pr.patch b/patches.suse/bpftool-Reimplement-large-insn-size-limit-feature-pr.patch new file mode 100644 index 0000000..511984e --- /dev/null +++ b/patches.suse/bpftool-Reimplement-large-insn-size-limit-feature-pr.patch @@ -0,0 +1,57 @@ +From: Andrii Nakryiko +Date: Fri, 17 Dec 2021 09:12:02 -0800 +Subject: bpftool: Reimplement large insn size limit feature probing +Patch-mainline: v5.17-rc1 +Git-commit: e967a20a8fabc6442a78e2e2059e63a4bb6aed08 +References: jsc#PED-1368 + +Reimplement bpf_probe_large_insn_limit() in bpftool, as that libbpf API +is scheduled for deprecation in v0.8. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Acked-by: Dave Marchevsky +Link: https://lore.kernel.org/bpf/20211217171202.3352835-4-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/feature.c | 26 +++++++++++++++++++++++--- + 1 file changed, 23 insertions(+), 3 deletions(-) + +--- a/tools/bpf/bpftool/feature.c ++++ b/tools/bpf/bpftool/feature.c +@@ -642,12 +642,32 @@ probe_helpers_for_progtype(enum bpf_prog + printf("\n"); + } + +-static void +-probe_large_insn_limit(const char *define_prefix, __u32 ifindex) ++/* ++ * Probe for availability of kernel commit (5.3): ++ * ++ * c04c0d2b968a ("bpf: increase complexity limit and maximum program size") ++ */ ++static void probe_large_insn_limit(const char *define_prefix, __u32 ifindex) + { ++ LIBBPF_OPTS(bpf_prog_load_opts, opts, ++ .prog_ifindex = ifindex, ++ ); ++ struct bpf_insn insns[BPF_MAXINSNS + 1]; + bool res; ++ int i, fd; ++ ++ for (i = 0; i < BPF_MAXINSNS; i++) ++ insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1); ++ insns[BPF_MAXINSNS] = BPF_EXIT_INSN(); ++ ++ errno = 0; ++ fd = bpf_prog_load(BPF_PROG_TYPE_SCHED_CLS, NULL, "GPL", ++ insns, ARRAY_SIZE(insns), &opts); ++ res = fd >= 0 || (errno != E2BIG && errno != EINVAL); ++ ++ if (fd >= 0) ++ close(fd); + +- res = bpf_probe_large_insn_limit(ifindex); + print_bool_feature("have_large_insn_limit", + "Large program size limit", + "LARGE_INSN_LIMIT", diff --git a/patches.suse/bpftool-Remove-inclusion-of-utilities.mak-from-Makef.patch b/patches.suse/bpftool-Remove-inclusion-of-utilities.mak-from-Makef.patch index a91754a..18c48e7 100644 --- a/patches.suse/bpftool-Remove-inclusion-of-utilities.mak-from-Makef.patch +++ b/patches.suse/bpftool-Remove-inclusion-of-utilities.mak-from-Makef.patch @@ -22,7 +22,7 @@ Acked-by: Shung-Hsi Yu --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -1,6 +1,5 @@ - # SPDX-License-Identifier: GPL-2.0-only + # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) include ../../../scripts/Makefile.include -include ../../../scripts/utilities.mak @@ -31,7 +31,7 @@ Acked-by: Shung-Hsi Yu --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -1,6 +1,5 @@ - # SPDX-License-Identifier: GPL-2.0-only + # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) include ../../scripts/Makefile.include -include ../../scripts/utilities.mak diff --git a/patches.suse/bpftool-Stop-using-deprecated-bpf_load_program.patch b/patches.suse/bpftool-Stop-using-deprecated-bpf_load_program.patch new file mode 100644 index 0000000..7cc03cf --- /dev/null +++ b/patches.suse/bpftool-Stop-using-deprecated-bpf_load_program.patch @@ -0,0 +1,28 @@ +From: Andrii Nakryiko +Date: Wed, 3 Nov 2021 15:08:39 -0700 +Subject: bpftool: Stop using deprecated bpf_load_program() +Patch-mainline: v5.17-rc1 +Git-commit: a3c7c7e8050fc299b42fa3d89bac253a8dfa5c0c +References: jsc#PED-1368 + +Switch to bpf_prog_load() instead. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211103220845.2676888-7-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/feature.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/bpf/bpftool/feature.c ++++ b/tools/bpf/bpftool/feature.c +@@ -467,7 +467,7 @@ static bool probe_bpf_syscall(const char + { + bool res; + +- bpf_load_program(BPF_PROG_TYPE_UNSPEC, NULL, 0, NULL, 0, NULL, 0); ++ bpf_prog_load(BPF_PROG_TYPE_UNSPEC, NULL, NULL, NULL, 0, NULL); + res = (errno != ENOSYS); + + print_bool_feature("have_bpf_syscall", diff --git a/patches.suse/bpftool-Support-BTF_KIND_TYPE_TAG.patch b/patches.suse/bpftool-Support-BTF_KIND_TYPE_TAG.patch new file mode 100644 index 0000000..0381375 --- /dev/null +++ b/patches.suse/bpftool-Support-BTF_KIND_TYPE_TAG.patch @@ -0,0 +1,36 @@ +From: Yonghong Song +Date: Thu, 11 Nov 2021 17:26:20 -0800 +Subject: bpftool: Support BTF_KIND_TYPE_TAG +Patch-mainline: v5.17-rc1 +Git-commit: 3da5ba6f0509ace03cad38b554c89797129e90be +References: jsc#PED-1368 + +Add bpftool support for BTF_KIND_TYPE_TAG. + +Signed-off-by: Yonghong Song +Signed-off-by: Alexei Starovoitov +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211112012620.1505506-1-yhs@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/btf.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/tools/bpf/bpftool/btf.c ++++ b/tools/bpf/bpftool/btf.c +@@ -39,6 +39,7 @@ static const char * const btf_kind_str[N + [BTF_KIND_DATASEC] = "DATASEC", + [BTF_KIND_FLOAT] = "FLOAT", + [BTF_KIND_DECL_TAG] = "DECL_TAG", ++ [BTF_KIND_TYPE_TAG] = "TYPE_TAG", + }; + + struct btf_attach_point { +@@ -142,6 +143,7 @@ static int dump_btf_type(const struct bt + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_TYPEDEF: ++ case BTF_KIND_TYPE_TAG: + if (json_output) + jsonw_uint_field(w, "type_id", t->type); + else diff --git a/patches.suse/bpftool-Switch-bpf_object__load_xattr-to-bpf_object_.patch b/patches.suse/bpftool-Switch-bpf_object__load_xattr-to-bpf_object_.patch new file mode 100644 index 0000000..890c57b --- /dev/null +++ b/patches.suse/bpftool-Switch-bpf_object__load_xattr-to-bpf_object_.patch @@ -0,0 +1,159 @@ +From: Andrii Nakryiko +Date: Thu, 9 Dec 2021 11:38:40 -0800 +Subject: bpftool: Switch bpf_object__load_xattr() to bpf_object__load() +Patch-mainline: v5.17-rc1 +Git-commit: b59e4ce8bcaab6445f4a0d37a96ca8953caaf5cf +References: jsc#PED-1368 + +Switch all the uses of to-be-deprecated bpf_object__load_xattr() into +a simple bpf_object__load() calls with optional log_level passed through +open_opts.kernel_log_level, if -d option is specified. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211209193840.1248570-13-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/gen.c | 11 ++++------- + tools/bpf/bpftool/prog.c | 24 ++++++++++-------------- + tools/bpf/bpftool/struct_ops.c | 15 +++++++-------- + 3 files changed, 21 insertions(+), 29 deletions(-) + +--- a/tools/bpf/bpftool/gen.c ++++ b/tools/bpf/bpftool/gen.c +@@ -486,7 +486,6 @@ static void codegen_destroy(struct bpf_o + + static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *header_guard) + { +- struct bpf_object_load_attr load_attr = {}; + DECLARE_LIBBPF_OPTS(gen_loader_opts, opts); + struct bpf_map *map; + char ident[256]; +@@ -496,12 +495,7 @@ static int gen_trace(struct bpf_object * + if (err) + return err; + +- load_attr.obj = obj; +- if (verifier_logs) +- /* log_level1 + log_level2 + stats, but not stable UAPI */ +- load_attr.log_level = 1 + 2 + 4; +- +- err = bpf_object__load_xattr(&load_attr); ++ err = bpf_object__load(obj); + if (err) { + p_err("failed to load object file"); + goto out; +@@ -719,6 +713,9 @@ static int do_skeleton(int argc, char ** + if (obj_name[0] == '\0') + get_obj_name(obj_name, file); + opts.object_name = obj_name; ++ if (verifier_logs) ++ /* log_level1 + log_level2 + stats, but not stable UAPI */ ++ opts.kernel_log_level = 1 + 2 + 4; + obj = bpf_object__open_mem(obj_data, file_sz, &opts); + err = libbpf_get_error(obj); + if (err) { +--- a/tools/bpf/bpftool/prog.c ++++ b/tools/bpf/bpftool/prog.c +@@ -1464,7 +1464,6 @@ static int load_with_options(int argc, c + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts, + .relaxed_maps = relaxed_maps, + ); +- struct bpf_object_load_attr load_attr = { 0 }; + enum bpf_attach_type expected_attach_type; + struct map_replace *map_replace = NULL; + struct bpf_program *prog = NULL, *pos; +@@ -1598,6 +1597,10 @@ static int load_with_options(int argc, c + + set_max_rlimit(); + ++ if (verifier_logs) ++ /* log_level1 + log_level2 + stats, but not stable UAPI */ ++ open_opts.kernel_log_level = 1 + 2 + 4; ++ + obj = bpf_object__open_file(file, &open_opts); + if (libbpf_get_error(obj)) { + p_err("failed to open object file"); +@@ -1677,12 +1680,7 @@ static int load_with_options(int argc, c + goto err_close_obj; + } + +- load_attr.obj = obj; +- if (verifier_logs) +- /* log_level1 + log_level2 + stats, but not stable UAPI */ +- load_attr.log_level = 1 + 2 + 4; +- +- err = bpf_object__load_xattr(&load_attr); ++ err = bpf_object__load(obj); + if (err) { + p_err("failed to load object file"); + goto err_close_obj; +@@ -1809,7 +1807,6 @@ static int do_loader(int argc, char **ar + { + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts); + DECLARE_LIBBPF_OPTS(gen_loader_opts, gen); +- struct bpf_object_load_attr load_attr = {}; + struct bpf_object *obj; + const char *file; + int err = 0; +@@ -1818,6 +1815,10 @@ static int do_loader(int argc, char **ar + return -1; + file = GET_ARG(); + ++ if (verifier_logs) ++ /* log_level1 + log_level2 + stats, but not stable UAPI */ ++ open_opts.kernel_log_level = 1 + 2 + 4; ++ + obj = bpf_object__open_file(file, &open_opts); + if (libbpf_get_error(obj)) { + p_err("failed to open object file"); +@@ -1828,12 +1829,7 @@ static int do_loader(int argc, char **ar + if (err) + goto err_close_obj; + +- load_attr.obj = obj; +- if (verifier_logs) +- /* log_level1 + log_level2 + stats, but not stable UAPI */ +- load_attr.log_level = 1 + 2 + 4; +- +- err = bpf_object__load_xattr(&load_attr); ++ err = bpf_object__load(obj); + if (err) { + p_err("failed to load object file"); + goto err_close_obj; +--- a/tools/bpf/bpftool/struct_ops.c ++++ b/tools/bpf/bpftool/struct_ops.c +@@ -479,7 +479,7 @@ static int do_unregister(int argc, char + + static int do_register(int argc, char **argv) + { +- struct bpf_object_load_attr load_attr = {}; ++ LIBBPF_OPTS(bpf_object_open_opts, open_opts); + const struct bpf_map_def *def; + struct bpf_map_info info = {}; + __u32 info_len = sizeof(info); +@@ -494,18 +494,17 @@ static int do_register(int argc, char ** + + file = GET_ARG(); + +- obj = bpf_object__open(file); ++ if (verifier_logs) ++ /* log_level1 + log_level2 + stats, but not stable UAPI */ ++ open_opts.kernel_log_level = 1 + 2 + 4; ++ ++ obj = bpf_object__open_file(file, &open_opts); + if (libbpf_get_error(obj)) + return -1; + + set_max_rlimit(); + +- load_attr.obj = obj; +- if (verifier_logs) +- /* log_level1 + log_level2 + stats, but not stable UAPI */ +- load_attr.log_level = 1 + 2 + 4; +- +- if (bpf_object__load_xattr(&load_attr)) { ++ if (bpf_object__load(obj)) { + bpf_object__close(obj); + return -1; + } diff --git a/patches.suse/bpftool-Update-btf_dump__new-and-perf_buffer__new_ra.patch b/patches.suse/bpftool-Update-btf_dump__new-and-perf_buffer__new_ra.patch new file mode 100644 index 0000000..346b2bf --- /dev/null +++ b/patches.suse/bpftool-Update-btf_dump__new-and-perf_buffer__new_ra.patch @@ -0,0 +1,71 @@ +From: Andrii Nakryiko +Date: Wed, 10 Nov 2021 21:36:24 -0800 +Subject: bpftool: Update btf_dump__new() and perf_buffer__new_raw() calls +Patch-mainline: v5.17-rc1 +Git-commit: 164b04f27fbd769f57905dfddd2a8953974eeef4 +References: jsc#PED-1368 + +Use v1.0-compatible variants of btf_dump and perf_buffer "constructors". +This is also a demonstration of reusing struct perf_buffer_raw_opts as +OPTS-style option struct for new perf_buffer__new_raw() API. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211111053624.190580-10-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/btf.c | 2 +- + tools/bpf/bpftool/gen.c | 2 +- + tools/bpf/bpftool/map_perf_ring.c | 9 +++------ + 3 files changed, 5 insertions(+), 8 deletions(-) + +--- a/tools/bpf/bpftool/btf.c ++++ b/tools/bpf/bpftool/btf.c +@@ -418,7 +418,7 @@ static int dump_btf_c(const struct btf * + struct btf_dump *d; + int err = 0, i; + +- d = btf_dump__new(btf, NULL, NULL, btf_dump_printf); ++ d = btf_dump__new(btf, btf_dump_printf, NULL, NULL); + if (IS_ERR(d)) + return PTR_ERR(d); + +--- a/tools/bpf/bpftool/gen.c ++++ b/tools/bpf/bpftool/gen.c +@@ -218,7 +218,7 @@ static int codegen_datasecs(struct bpf_o + char sec_ident[256], map_ident[256]; + int i, err = 0; + +- d = btf_dump__new(btf, NULL, NULL, codegen_btf_dump_printf); ++ d = btf_dump__new(btf, codegen_btf_dump_printf, NULL, NULL); + if (IS_ERR(d)) + return PTR_ERR(d); + +--- a/tools/bpf/bpftool/map_perf_ring.c ++++ b/tools/bpf/bpftool/map_perf_ring.c +@@ -124,7 +124,7 @@ int do_event_pipe(int argc, char **argv) + .wakeup_events = 1, + }; + struct bpf_map_info map_info = {}; +- struct perf_buffer_raw_opts opts = {}; ++ LIBBPF_OPTS(perf_buffer_raw_opts, opts); + struct event_pipe_ctx ctx = { + .all_cpus = true, + .cpu = -1, +@@ -190,14 +190,11 @@ int do_event_pipe(int argc, char **argv) + ctx.idx = 0; + } + +- opts.attr = &perf_attr; +- opts.event_cb = print_bpf_output; +- opts.ctx = &ctx; + opts.cpu_cnt = ctx.all_cpus ? 0 : 1; + opts.cpus = &ctx.cpu; + opts.map_keys = &ctx.idx; +- +- pb = perf_buffer__new_raw(map_fd, MMAP_PAGE_CNT, &opts); ++ pb = perf_buffer__new_raw(map_fd, MMAP_PAGE_CNT, &perf_attr, ++ print_bpf_output, &ctx, &opts); + err = libbpf_get_error(pb); + if (err) { + p_err("failed to create perf buffer: %s (%d)", diff --git a/patches.suse/bpftool-Update-doc-use-susbtitutions-and-test_bpftoo.patch b/patches.suse/bpftool-Update-doc-use-susbtitutions-and-test_bpftoo.patch new file mode 100644 index 0000000..a6dd029 --- /dev/null +++ b/patches.suse/bpftool-Update-doc-use-susbtitutions-and-test_bpftoo.patch @@ -0,0 +1,385 @@ +From: Quentin Monnet +Date: Mon, 15 Nov 2021 22:58:43 +0000 +Subject: bpftool: Update doc (use susbtitutions) and test_bpftool_synctypes.py +Patch-mainline: v5.17-rc1 +Git-commit: b623181520404ef48f7421333561bd294c6c6b11 +References: jsc#PED-1368 + +test_bpftool_synctypes.py helps detecting inconsistencies in bpftool +between the different list of types and options scattered in the +sources, the documentation, and the bash completion. For options that +apply to all bpftool commands, the script had a hardcoded list of +values, and would use them to check whether the man pages are +up-to-date. When writing the script, it felt acceptable to have this +list in order to avoid to open and parse bpftool's main.h every time, +and because the list of global options in bpftool doesn't change so +often. + +However, this is prone to omissions, and we recently added a new +-l|--legacy option which was described in common_options.rst, but not +listed in the options summary of each manual page. The script did not +complain, because it keeps comparing the hardcoded list to the (now) +outdated list in the header file. + +To address the issue, this commit brings the following changes: + +- Options that are common to all bpftool commands (--json, --pretty, and + --debug) are moved to a dedicated file, and used in the definition of + a RST substitution. This substitution is used in the sources of all + the man pages. + +- This list of common options is updated, with the addition of the new + -l|--legacy option. + +- The script test_bpftool_synctypes.py is updated to compare: + - Options specific to a command, found in C files, for the + interactive help messages, with the same specific options from the + relevant man page for that command. + - Common options, checked just once: the list in main.h is + compared with the new list in substitutions.rst. + +Signed-off-by: Quentin Monnet +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211115225844.33943-3-quentin@isovalent.com +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/Documentation/bpftool-btf.rst | 5 - + tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 5 - + tools/bpf/bpftool/Documentation/bpftool-feature.rst | 4 + tools/bpf/bpftool/Documentation/bpftool-gen.rst | 5 - + tools/bpf/bpftool/Documentation/bpftool-iter.rst | 4 + tools/bpf/bpftool/Documentation/bpftool-link.rst | 5 - + tools/bpf/bpftool/Documentation/bpftool-map.rst | 5 - + tools/bpf/bpftool/Documentation/bpftool-net.rst | 4 + tools/bpf/bpftool/Documentation/bpftool-perf.rst | 4 + tools/bpf/bpftool/Documentation/bpftool-prog.rst | 4 + tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst | 4 + tools/bpf/bpftool/Documentation/bpftool.rst | 5 - + tools/bpf/bpftool/Documentation/substitutions.rst | 3 + tools/testing/selftests/bpf/test_bpftool_synctypes.py | 70 +++++++++++++++-- + 14 files changed, 102 insertions(+), 25 deletions(-) + create mode 100644 tools/bpf/bpftool/Documentation/substitutions.rst + +--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst +@@ -9,13 +9,14 @@ tool for inspection of BTF data + + :Manual section: 8 + ++.. include:: substitutions.rst ++ + SYNOPSIS + ======== + + **bpftool** [*OPTIONS*] **btf** *COMMAND* + +- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | {**-d** | **--debug** } | +- { **-B** | **--base-btf** } } ++ *OPTIONS* := { |COMMON_OPTIONS| | { **-B** | **--base-btf** } } + + *COMMANDS* := { **dump** | **help** } + +--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +@@ -9,13 +9,14 @@ tool for inspection and simple manipulat + + :Manual section: 8 + ++.. include:: substitutions.rst ++ + SYNOPSIS + ======== + + **bpftool** [*OPTIONS*] **cgroup** *COMMAND* + +- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | +- { **-f** | **--bpffs** } } ++ *OPTIONS* := { |COMMON_OPTIONS| | { **-f** | **--bpffs** } } + + *COMMANDS* := + { **show** | **list** | **tree** | **attach** | **detach** | **help** } +--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst +@@ -9,12 +9,14 @@ tool for inspection of eBPF-related para + + :Manual section: 8 + ++.. include:: substitutions.rst ++ + SYNOPSIS + ======== + + **bpftool** [*OPTIONS*] **feature** *COMMAND* + +- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } ++ *OPTIONS* := { |COMMON_OPTIONS| } + + *COMMANDS* := { **probe** | **help** } + +--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst +@@ -9,13 +9,14 @@ tool for BPF code-generation + + :Manual section: 8 + ++.. include:: substitutions.rst ++ + SYNOPSIS + ======== + + **bpftool** [*OPTIONS*] **gen** *COMMAND* + +- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | +- { **-L** | **--use-loader** } } ++ *OPTIONS* := { |COMMON_OPTIONS| | { **-L** | **--use-loader** } } + + *COMMAND* := { **object** | **skeleton** | **help** } + +--- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst +@@ -9,12 +9,14 @@ tool to create BPF iterators + + :Manual section: 8 + ++.. include:: substitutions.rst ++ + SYNOPSIS + ======== + + **bpftool** [*OPTIONS*] **iter** *COMMAND* + +- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } ++ *OPTIONS* := { |COMMON_OPTIONS| } + + *COMMANDS* := { **pin** | **help** } + +--- a/tools/bpf/bpftool/Documentation/bpftool-link.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst +@@ -9,13 +9,14 @@ tool for inspection and simple manipulat + + :Manual section: 8 + ++.. include:: substitutions.rst ++ + SYNOPSIS + ======== + + **bpftool** [*OPTIONS*] **link** *COMMAND* + +- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | +- { **-f** | **--bpffs** } | { **-n** | **--nomount** } } ++ *OPTIONS* := { |COMMON_OPTIONS| | { **-f** | **--bpffs** } | { **-n** | **--nomount** } } + + *COMMANDS* := { **show** | **list** | **pin** | **help** } + +--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst +@@ -9,13 +9,14 @@ tool for inspection and simple manipulat + + :Manual section: 8 + ++.. include:: substitutions.rst ++ + SYNOPSIS + ======== + + **bpftool** [*OPTIONS*] **map** *COMMAND* + +- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | +- { **-f** | **--bpffs** } | { **-n** | **--nomount** } } ++ *OPTIONS* := { |COMMON_OPTIONS| | { **-f** | **--bpffs** } | { **-n** | **--nomount** } } + + *COMMANDS* := + { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** | +--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst +@@ -9,12 +9,14 @@ tool for inspection of netdev/tc related + + :Manual section: 8 + ++.. include:: substitutions.rst ++ + SYNOPSIS + ======== + + **bpftool** [*OPTIONS*] **net** *COMMAND* + +- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } ++ *OPTIONS* := { |COMMON_OPTIONS| } + + *COMMANDS* := + { **show** | **list** | **attach** | **detach** | **help** } +--- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst +@@ -9,12 +9,14 @@ tool for inspection of perf related bpf + + :Manual section: 8 + ++.. include:: substitutions.rst ++ + SYNOPSIS + ======== + + **bpftool** [*OPTIONS*] **perf** *COMMAND* + +- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } ++ *OPTIONS* := { |COMMON_OPTIONS| } + + *COMMANDS* := + { **show** | **list** | **help** } +--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst +@@ -9,12 +9,14 @@ tool for inspection and simple manipulat + + :Manual section: 8 + ++.. include:: substitutions.rst ++ + SYNOPSIS + ======== + + **bpftool** [*OPTIONS*] **prog** *COMMAND* + +- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | ++ *OPTIONS* := { |COMMON_OPTIONS| | + { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } | + { **-L** | **--use-loader** } } + +--- a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst +@@ -9,12 +9,14 @@ tool to register/unregister/introspect B + + :Manual section: 8 + ++.. include:: substitutions.rst ++ + SYNOPSIS + ======== + + **bpftool** [*OPTIONS*] **struct_ops** *COMMAND* + +- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } ++ *OPTIONS* := { |COMMON_OPTIONS| } + + *COMMANDS* := + { **show** | **list** | **dump** | **register** | **unregister** | **help** } +--- a/tools/bpf/bpftool/Documentation/bpftool.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool.rst +@@ -9,6 +9,8 @@ tool for inspection and simple manipulat + + :Manual section: 8 + ++.. include:: substitutions.rst ++ + SYNOPSIS + ======== + +@@ -20,8 +22,7 @@ SYNOPSIS + + *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** | **feature** } + +- *OPTIONS* := { { **-V** | **--version** } | +- { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } ++ *OPTIONS* := { { **-V** | **--version** } | |COMMON_OPTIONS| } + + *MAP-COMMANDS* := + { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** | +--- /dev/null ++++ b/tools/bpf/bpftool/Documentation/substitutions.rst +@@ -0,0 +1,3 @@ ++.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) ++ ++.. |COMMON_OPTIONS| replace:: { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | { **-l** | **--legacy** } +--- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py ++++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py +@@ -242,12 +242,6 @@ class FileExtractor(object): + end_marker = re.compile('}\\\\n') + return self.__get_description_list(start_marker, pattern, end_marker) + +- def default_options(self): +- """ +- Return the default options contained in HELP_SPEC_OPTIONS +- """ +- return { '-j', '--json', '-p', '--pretty', '-d', '--debug' } +- + def get_bashcomp_list(self, block_name): + """ + Search for and parse a list of type names from a variable in bash +@@ -274,7 +268,56 @@ class SourceFileExtractor(FileExtractor) + defined in children classes. + """ + def get_options(self): +- return self.default_options().union(self.get_help_list_macro('HELP_SPEC_OPTIONS')) ++ return self.get_help_list_macro('HELP_SPEC_OPTIONS') ++ ++class MainHeaderFileExtractor(SourceFileExtractor): ++ """ ++ An extractor for bpftool's main.h ++ """ ++ filename = os.path.join(BPFTOOL_DIR, 'main.h') ++ ++ def get_common_options(self): ++ """ ++ Parse the list of common options in main.h (options that apply to all ++ commands), which looks to the lists of options in other source files ++ but has different start and end markers: ++ ++ "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy}" ++ ++ Return a set containing all options, such as: ++ ++ {'-p', '-d', '--legacy', '--pretty', '--debug', '--json', '-l', '-j'} ++ """ ++ start_marker = re.compile(f'"OPTIONS :=') ++ pattern = re.compile('([\w-]+) ?(?:\||}[ }\]"])') ++ end_marker = re.compile('#define') ++ ++ parser = InlineListParser(self.reader) ++ parser.search_block(start_marker) ++ return parser.parse(pattern, end_marker) ++ ++class ManSubstitutionsExtractor(SourceFileExtractor): ++ """ ++ An extractor for substitutions.rst ++ """ ++ filename = os.path.join(BPFTOOL_DIR, 'Documentation/substitutions.rst') ++ ++ def get_common_options(self): ++ """ ++ Parse the list of common options in substitutions.rst (options that ++ apply to all commands). ++ ++ Return a set containing all options, such as: ++ ++ {'-p', '-d', '--legacy', '--pretty', '--debug', '--json', '-l', '-j'} ++ """ ++ start_marker = re.compile('\|COMMON_OPTIONS\| replace:: {') ++ pattern = re.compile('\*\*([\w/-]+)\*\*') ++ end_marker = re.compile('}$') ++ ++ parser = InlineListParser(self.reader) ++ parser.search_block(start_marker) ++ return parser.parse(pattern, end_marker) + + class ProgFileExtractor(SourceFileExtractor): + """ +@@ -580,6 +623,19 @@ def main(): + verify(help_main_options, man_main_options, + f'Comparing {source_main_info.filename} (do_help() OPTIONS) and {man_main_info.filename} (OPTIONS):') + ++ # Compare common options (options that apply to all commands) ++ ++ main_hdr_info = MainHeaderFileExtractor() ++ source_common_options = main_hdr_info.get_common_options() ++ main_hdr_info.close() ++ ++ man_substitutions = ManSubstitutionsExtractor() ++ man_common_options = man_substitutions.get_common_options() ++ man_substitutions.close() ++ ++ verify(source_common_options, man_common_options, ++ f'Comparing common options from {main_hdr_info.filename} (HELP_SPEC_OPTIONS) and {man_substitutions.filename}:') ++ + sys.exit(retval) + + if __name__ == "__main__": diff --git a/patches.suse/bpftool-Update-the-lists-of-names-for-maps-and-prog-.patch b/patches.suse/bpftool-Update-the-lists-of-names-for-maps-and-prog-.patch new file mode 100644 index 0000000..770eaf5 --- /dev/null +++ b/patches.suse/bpftool-Update-the-lists-of-names-for-maps-and-prog-.patch @@ -0,0 +1,77 @@ +From: Quentin Monnet +Date: Wed, 10 Nov 2021 11:46:31 +0000 +Subject: bpftool: Update the lists of names for maps and prog-attach types +Patch-mainline: v5.17-rc1 +Git-commit: 3811e2753a39efb8aa5b8c133dc24f6d26f6cd96 +References: jsc#PED-1368 + +To support the different BPF map or attach types, bpftool must remain +up-to-date with the types supported by the kernel. Let's update the +lists, by adding the missing Bloom filter map type and the perf_event +attach type. + +Both missing items were found with test_bpftool_synctypes.py. + +Signed-off-by: Quentin Monnet +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211110114632.24537-6-quentin@isovalent.com +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/Documentation/bpftool-map.rst | 2 +- + tools/bpf/bpftool/bash-completion/bpftool | 3 ++- + tools/bpf/bpftool/common.c | 1 + + tools/bpf/bpftool/map.c | 3 ++- + 4 files changed, 6 insertions(+), 3 deletions(-) + +--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst ++++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst +@@ -52,7 +52,7 @@ MAP COMMANDS + | | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash** + | | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** + | | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** +- | **task_storage** } ++| | **task_storage** | **bloom_filter** } + + DESCRIPTION + =========== +--- a/tools/bpf/bpftool/bash-completion/bpftool ++++ b/tools/bpf/bpftool/bash-completion/bpftool +@@ -710,7 +710,8 @@ _bpftool() + hash_of_maps devmap devmap_hash sockmap cpumap \ + xskmap sockhash cgroup_storage reuseport_sockarray \ + percpu_cgroup_storage queue stack sk_storage \ +- struct_ops inode_storage task_storage ringbuf' ++ struct_ops ringbuf inode_storage task_storage \ ++ bloom_filter' + COMPREPLY=( $( compgen -W "$BPFTOOL_MAP_CREATE_TYPES" -- "$cur" ) ) + return 0 + ;; +--- a/tools/bpf/bpftool/common.c ++++ b/tools/bpf/bpftool/common.c +@@ -74,6 +74,7 @@ const char * const attach_type_name[__MA + [BPF_XDP] = "xdp", + [BPF_SK_REUSEPORT_SELECT] = "sk_skb_reuseport_select", + [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_skb_reuseport_select_or_migrate", ++ [BPF_PERF_EVENT] = "perf_event", + }; + + void p_err(const char *fmt, ...) +--- a/tools/bpf/bpftool/map.c ++++ b/tools/bpf/bpftool/map.c +@@ -53,6 +53,7 @@ const char * const map_type_name[] = { + [BPF_MAP_TYPE_RINGBUF] = "ringbuf", + [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", + [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", ++ [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", + }; + + const size_t map_type_name_size = ARRAY_SIZE(map_type_name); +@@ -1477,7 +1478,7 @@ static int do_help(int argc, char **argv + " devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n" + " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n" + " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n" +- " task_storage }\n" ++ " task_storage | bloom_filter }\n" + " " HELP_SPEC_OPTIONS " |\n" + " {-f|--bpffs} | {-n|--nomount} }\n" + "", diff --git a/patches.suse/bpftool-Use-bpf_obj_get_info_by_fd-directly.patch b/patches.suse/bpftool-Use-bpf_obj_get_info_by_fd-directly.patch new file mode 100644 index 0000000..83d336d --- /dev/null +++ b/patches.suse/bpftool-Use-bpf_obj_get_info_by_fd-directly.patch @@ -0,0 +1,342 @@ +From: Dave Marchevsky +Date: Mon, 1 Nov 2021 15:43:55 -0700 +Subject: bpftool: Use bpf_obj_get_info_by_fd directly +Patch-mainline: v5.17-rc1 +Git-commit: c59765cfd193382b00454b1a4424cb78d4c065e2 +References: jsc#PED-1368 + +To prepare for impending deprecation of libbpf's +bpf_program__get_prog_info_linear, migrate uses of this function to use +bpf_obj_get_info_by_fd. + +Since the profile_target_name and dump_prog_id_as_func_ptr helpers were +only looking at the first func_info, avoid grabbing the rest to save a +malloc. For do_dump, add a more full-featured helper, but avoid +free/realloc of buffer when possible for multi-prog dumps. + +Signed-off-by: Dave Marchevsky +Signed-off-by: Andrii Nakryiko +Acked-by: Quentin Monnet +Link: https://lore.kernel.org/bpf/20211101224357.2651181-3-davemarchevsky@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/btf_dumper.c | 40 ++++++---- + tools/bpf/bpftool/prog.c | 159 ++++++++++++++++++++++++++++++++--------- + 2 files changed, 149 insertions(+), 50 deletions(-) + +--- a/tools/bpf/bpftool/btf_dumper.c ++++ b/tools/bpf/bpftool/btf_dumper.c +@@ -32,14 +32,16 @@ static int dump_prog_id_as_func_ptr(cons + const struct btf_type *func_proto, + __u32 prog_id) + { +- struct bpf_prog_info_linear *prog_info = NULL; + const struct btf_type *func_type; ++ int prog_fd = -1, func_sig_len; ++ struct bpf_prog_info info = {}; ++ __u32 info_len = sizeof(info); + const char *prog_name = NULL; +- struct bpf_func_info *finfo; + struct btf *prog_btf = NULL; +- struct bpf_prog_info *info; +- int prog_fd, func_sig_len; ++ struct bpf_func_info finfo; ++ __u32 finfo_rec_size; + char prog_str[1024]; ++ int err; + + /* Get the ptr's func_proto */ + func_sig_len = btf_dump_func(d->btf, prog_str, func_proto, NULL, 0, +@@ -55,22 +57,27 @@ static int dump_prog_id_as_func_ptr(cons + if (prog_fd < 0) + goto print; + +- prog_info = bpf_program__get_prog_info_linear(prog_fd, +- 1UL << BPF_PROG_INFO_FUNC_INFO); +- close(prog_fd); +- if (IS_ERR(prog_info)) { +- prog_info = NULL; ++ err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); ++ if (err) + goto print; +- } +- info = &prog_info->info; + +- if (!info->btf_id || !info->nr_func_info) ++ if (!info.btf_id || !info.nr_func_info) + goto print; +- prog_btf = btf__load_from_kernel_by_id(info->btf_id); ++ ++ finfo_rec_size = info.func_info_rec_size; ++ memset(&info, 0, sizeof(info)); ++ info.nr_func_info = 1; ++ info.func_info_rec_size = finfo_rec_size; ++ info.func_info = ptr_to_u64(&finfo); ++ ++ err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); ++ if (err) ++ goto print; ++ ++ prog_btf = btf__load_from_kernel_by_id(info.btf_id); + if (libbpf_get_error(prog_btf)) + goto print; +- finfo = u64_to_ptr(info->func_info); +- func_type = btf__type_by_id(prog_btf, finfo->type_id); ++ func_type = btf__type_by_id(prog_btf, finfo.type_id); + if (!func_type || !btf_is_func(func_type)) + goto print; + +@@ -92,7 +99,8 @@ print: + prog_str[sizeof(prog_str) - 1] = '\0'; + jsonw_string(d->jw, prog_str); + btf__free(prog_btf); +- free(prog_info); ++ if (prog_fd >= 0) ++ close(prog_fd); + return 0; + } + +--- a/tools/bpf/bpftool/prog.c ++++ b/tools/bpf/bpftool/prog.c +@@ -100,6 +100,76 @@ static enum bpf_attach_type parse_attach + return __MAX_BPF_ATTACH_TYPE; + } + ++static int prep_prog_info(struct bpf_prog_info *const info, enum dump_mode mode, ++ void **info_data, size_t *const info_data_sz) ++{ ++ struct bpf_prog_info holder = {}; ++ size_t needed = 0; ++ void *ptr; ++ ++ if (mode == DUMP_JITED) { ++ holder.jited_prog_len = info->jited_prog_len; ++ needed += info->jited_prog_len; ++ } else { ++ holder.xlated_prog_len = info->xlated_prog_len; ++ needed += info->xlated_prog_len; ++ } ++ ++ holder.nr_jited_ksyms = info->nr_jited_ksyms; ++ needed += info->nr_jited_ksyms * sizeof(__u64); ++ ++ holder.nr_jited_func_lens = info->nr_jited_func_lens; ++ needed += info->nr_jited_func_lens * sizeof(__u32); ++ ++ holder.nr_func_info = info->nr_func_info; ++ holder.func_info_rec_size = info->func_info_rec_size; ++ needed += info->nr_func_info * info->func_info_rec_size; ++ ++ holder.nr_line_info = info->nr_line_info; ++ holder.line_info_rec_size = info->line_info_rec_size; ++ needed += info->nr_line_info * info->line_info_rec_size; ++ ++ holder.nr_jited_line_info = info->nr_jited_line_info; ++ holder.jited_line_info_rec_size = info->jited_line_info_rec_size; ++ needed += info->nr_jited_line_info * info->jited_line_info_rec_size; ++ ++ if (needed > *info_data_sz) { ++ ptr = realloc(*info_data, needed); ++ if (!ptr) ++ return -1; ++ ++ *info_data = ptr; ++ *info_data_sz = needed; ++ } ++ ptr = *info_data; ++ ++ if (mode == DUMP_JITED) { ++ holder.jited_prog_insns = ptr_to_u64(ptr); ++ ptr += holder.jited_prog_len; ++ } else { ++ holder.xlated_prog_insns = ptr_to_u64(ptr); ++ ptr += holder.xlated_prog_len; ++ } ++ ++ holder.jited_ksyms = ptr_to_u64(ptr); ++ ptr += holder.nr_jited_ksyms * sizeof(__u64); ++ ++ holder.jited_func_lens = ptr_to_u64(ptr); ++ ptr += holder.nr_jited_func_lens * sizeof(__u32); ++ ++ holder.func_info = ptr_to_u64(ptr); ++ ptr += holder.nr_func_info * holder.func_info_rec_size; ++ ++ holder.line_info = ptr_to_u64(ptr); ++ ptr += holder.nr_line_info * holder.line_info_rec_size; ++ ++ holder.jited_line_info = ptr_to_u64(ptr); ++ ptr += holder.nr_jited_line_info * holder.jited_line_info_rec_size; ++ ++ *info = holder; ++ return 0; ++} ++ + static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) + { + struct timespec real_time_ts, boot_time_ts; +@@ -803,16 +873,18 @@ prog_dump(struct bpf_prog_info *info, en + + static int do_dump(int argc, char **argv) + { +- struct bpf_prog_info_linear *info_linear; ++ struct bpf_prog_info info; ++ __u32 info_len = sizeof(info); ++ size_t info_data_sz = 0; ++ void *info_data = NULL; + char *filepath = NULL; + bool opcodes = false; + bool visual = false; + enum dump_mode mode; + bool linum = false; +- int *fds = NULL; + int nb_fds, i = 0; ++ int *fds = NULL; + int err = -1; +- __u64 arrays; + + if (is_prefix(*argv, "jited")) { + if (disasm_init()) +@@ -872,43 +944,44 @@ static int do_dump(int argc, char **argv + goto exit_close; + } + +- if (mode == DUMP_JITED) +- arrays = 1UL << BPF_PROG_INFO_JITED_INSNS; +- else +- arrays = 1UL << BPF_PROG_INFO_XLATED_INSNS; +- +- arrays |= 1UL << BPF_PROG_INFO_JITED_KSYMS; +- arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; +- arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; +- arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; +- arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; +- + if (json_output && nb_fds > 1) + jsonw_start_array(json_wtr); /* root array */ + for (i = 0; i < nb_fds; i++) { +- info_linear = bpf_program__get_prog_info_linear(fds[i], arrays); +- if (IS_ERR_OR_NULL(info_linear)) { ++ memset(&info, 0, sizeof(info)); ++ ++ err = bpf_obj_get_info_by_fd(fds[i], &info, &info_len); ++ if (err) { ++ p_err("can't get prog info: %s", strerror(errno)); ++ break; ++ } ++ ++ err = prep_prog_info(&info, mode, &info_data, &info_data_sz); ++ if (err) { ++ p_err("can't grow prog info_data"); ++ break; ++ } ++ ++ err = bpf_obj_get_info_by_fd(fds[i], &info, &info_len); ++ if (err) { + p_err("can't get prog info: %s", strerror(errno)); + break; + } + + if (json_output && nb_fds > 1) { + jsonw_start_object(json_wtr); /* prog object */ +- print_prog_header_json(&info_linear->info); ++ print_prog_header_json(&info); + jsonw_name(json_wtr, "insns"); + } else if (nb_fds > 1) { +- print_prog_header_plain(&info_linear->info); ++ print_prog_header_plain(&info); + } + +- err = prog_dump(&info_linear->info, mode, filepath, opcodes, +- visual, linum); ++ err = prog_dump(&info, mode, filepath, opcodes, visual, linum); + + if (json_output && nb_fds > 1) + jsonw_end_object(json_wtr); /* prog object */ + else if (i != nb_fds - 1 && nb_fds > 1) + printf("\n"); + +- free(info_linear); + if (err) + break; + close(fds[i]); +@@ -920,6 +993,7 @@ exit_close: + for (; i < nb_fds; i++) + close(fds[i]); + exit_free: ++ free(info_data); + free(fds); + return err; + } +@@ -2016,41 +2090,58 @@ static void profile_print_readings(void) + + static char *profile_target_name(int tgt_fd) + { +- struct bpf_prog_info_linear *info_linear; +- struct bpf_func_info *func_info; ++ struct bpf_func_info func_info; ++ struct bpf_prog_info info = {}; ++ __u32 info_len = sizeof(info); + const struct btf_type *t; ++ __u32 func_info_rec_size; + struct btf *btf = NULL; + char *name = NULL; ++ int err; + +- info_linear = bpf_program__get_prog_info_linear( +- tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); +- if (IS_ERR_OR_NULL(info_linear)) { +- p_err("failed to get info_linear for prog FD %d", tgt_fd); +- return NULL; ++ err = bpf_obj_get_info_by_fd(tgt_fd, &info, &info_len); ++ if (err) { ++ p_err("failed to bpf_obj_get_info_by_fd for prog FD %d", tgt_fd); ++ goto out; + } + +- if (info_linear->info.btf_id == 0) { ++ if (info.btf_id == 0) { + p_err("prog FD %d doesn't have valid btf", tgt_fd); + goto out; + } + +- btf = btf__load_from_kernel_by_id(info_linear->info.btf_id); ++ func_info_rec_size = info.func_info_rec_size; ++ if (info.nr_func_info == 0) { ++ p_err("bpf_obj_get_info_by_fd for prog FD %d found 0 func_info", tgt_fd); ++ goto out; ++ } ++ ++ memset(&info, 0, sizeof(info)); ++ info.nr_func_info = 1; ++ info.func_info_rec_size = func_info_rec_size; ++ info.func_info = ptr_to_u64(&func_info); ++ ++ err = bpf_obj_get_info_by_fd(tgt_fd, &info, &info_len); ++ if (err) { ++ p_err("failed to get func_info for prog FD %d", tgt_fd); ++ goto out; ++ } ++ ++ btf = btf__load_from_kernel_by_id(info.btf_id); + if (libbpf_get_error(btf)) { + p_err("failed to load btf for prog FD %d", tgt_fd); + goto out; + } + +- func_info = u64_to_ptr(info_linear->info.func_info); +- t = btf__type_by_id(btf, func_info[0].type_id); ++ t = btf__type_by_id(btf, func_info.type_id); + if (!t) { + p_err("btf %d doesn't have type %d", +- info_linear->info.btf_id, func_info[0].type_id); ++ info.btf_id, func_info.type_id); + goto out; + } + name = strdup(btf__name_by_offset(btf, t->name_off)); + out: + btf__free(btf); +- free(info_linear); + return name; + } + diff --git a/patches.suse/bpftool-Use-libbpf_get_error-to-check-error.patch b/patches.suse/bpftool-Use-libbpf_get_error-to-check-error.patch new file mode 100644 index 0000000..b6efe99 --- /dev/null +++ b/patches.suse/bpftool-Use-libbpf_get_error-to-check-error.patch @@ -0,0 +1,204 @@ +From: Hengqi Chen +Date: Mon, 15 Nov 2021 09:24:36 +0800 +Subject: bpftool: Use libbpf_get_error() to check error +Patch-mainline: v5.17-rc1 +Git-commit: e5043894b21f7d99d3db31ad06308d6c5726caa6 +References: jsc#PED-1368 + +Currently, LIBBPF_STRICT_ALL mode is enabled by default for +bpftool which means on error cases, some libbpf APIs would +return NULL pointers. This makes IS_ERR check failed to detect +such cases and result in segfault error. Use libbpf_get_error() +instead like we do in libbpf itself. + +Signed-off-by: Hengqi Chen +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211115012436.3143318-1-hengqi.chen@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/btf.c | 9 +++++---- + tools/bpf/bpftool/gen.c | 10 ++++++---- + tools/bpf/bpftool/iter.c | 7 ++++--- + tools/bpf/bpftool/map.c | 10 +++++----- + tools/bpf/bpftool/struct_ops.c | 14 +++++++------- + 5 files changed, 27 insertions(+), 23 deletions(-) + +--- a/tools/bpf/bpftool/btf.c ++++ b/tools/bpf/bpftool/btf.c +@@ -421,8 +421,9 @@ static int dump_btf_c(const struct btf * + int err = 0, i; + + d = btf_dump__new(btf, btf_dump_printf, NULL, NULL); +- if (IS_ERR(d)) +- return PTR_ERR(d); ++ err = libbpf_get_error(d); ++ if (err) ++ return err; + + printf("#ifndef __VMLINUX_H__\n"); + printf("#define __VMLINUX_H__\n"); +@@ -549,8 +550,8 @@ static int do_dump(int argc, char **argv + } + + btf = btf__parse_split(*argv, base ?: base_btf); +- if (IS_ERR(btf)) { +- err = -PTR_ERR(btf); ++ err = libbpf_get_error(btf); ++ if (err) { + btf = NULL; + p_err("failed to load BTF from %s: %s", + *argv, strerror(err)); +--- a/tools/bpf/bpftool/gen.c ++++ b/tools/bpf/bpftool/gen.c +@@ -219,8 +219,9 @@ static int codegen_datasecs(struct bpf_o + int i, err = 0; + + d = btf_dump__new(btf, codegen_btf_dump_printf, NULL, NULL); +- if (IS_ERR(d)) +- return PTR_ERR(d); ++ err = libbpf_get_error(d); ++ if (err) ++ return err; + + bpf_object__for_each_map(map, obj) { + /* only generate definitions for memory-mapped internal maps */ +@@ -719,10 +720,11 @@ static int do_skeleton(int argc, char ** + get_obj_name(obj_name, file); + opts.object_name = obj_name; + obj = bpf_object__open_mem(obj_data, file_sz, &opts); +- if (IS_ERR(obj)) { ++ err = libbpf_get_error(obj); ++ if (err) { + char err_buf[256]; + +- libbpf_strerror(PTR_ERR(obj), err_buf, sizeof(err_buf)); ++ libbpf_strerror(err, err_buf, sizeof(err_buf)); + p_err("failed to open BPF object file: %s", err_buf); + obj = NULL; + goto out; +--- a/tools/bpf/bpftool/iter.c ++++ b/tools/bpf/bpftool/iter.c +@@ -46,7 +46,8 @@ static int do_pin(int argc, char **argv) + } + + obj = bpf_object__open(objfile); +- if (IS_ERR(obj)) { ++ err = libbpf_get_error(obj); ++ if (err) { + p_err("can't open objfile %s", objfile); + goto close_map_fd; + } +@@ -64,8 +65,8 @@ static int do_pin(int argc, char **argv) + } + + link = bpf_program__attach_iter(prog, &iter_opts); +- if (IS_ERR(link)) { +- err = PTR_ERR(link); ++ err = libbpf_get_error(link); ++ if (err) { + p_err("attach_iter failed for program %s", + bpf_program__name(prog)); + goto close_obj; +--- a/tools/bpf/bpftool/map.c ++++ b/tools/bpf/bpftool/map.c +@@ -812,7 +812,7 @@ static struct btf *get_map_kv_btf(const + if (info->btf_vmlinux_value_type_id) { + if (!btf_vmlinux) { + btf_vmlinux = libbpf_find_kernel_btf(); +- if (IS_ERR(btf_vmlinux)) ++ if (libbpf_get_error(btf_vmlinux)) + p_err("failed to get kernel btf"); + } + return btf_vmlinux; +@@ -832,13 +832,13 @@ static struct btf *get_map_kv_btf(const + + static void free_map_kv_btf(struct btf *btf) + { +- if (!IS_ERR(btf) && btf != btf_vmlinux) ++ if (!libbpf_get_error(btf) && btf != btf_vmlinux) + btf__free(btf); + } + + static void free_btf_vmlinux(void) + { +- if (!IS_ERR(btf_vmlinux)) ++ if (!libbpf_get_error(btf_vmlinux)) + btf__free(btf_vmlinux); + } + +@@ -863,8 +863,8 @@ map_dump(int fd, struct bpf_map_info *in + + if (wtr) { + btf = get_map_kv_btf(info); +- if (IS_ERR(btf)) { +- err = PTR_ERR(btf); ++ err = libbpf_get_error(btf); ++ if (err) { + goto exit_free; + } + +--- a/tools/bpf/bpftool/struct_ops.c ++++ b/tools/bpf/bpftool/struct_ops.c +@@ -32,7 +32,7 @@ static const struct btf *get_btf_vmlinux + return btf_vmlinux; + + btf_vmlinux = libbpf_find_kernel_btf(); +- if (IS_ERR(btf_vmlinux)) ++ if (libbpf_get_error(btf_vmlinux)) + p_err("struct_ops requires kernel CONFIG_DEBUG_INFO_BTF=y"); + + return btf_vmlinux; +@@ -45,7 +45,7 @@ static const char *get_kern_struct_ops_n + const char *st_ops_name; + + kern_btf = get_btf_vmlinux(); +- if (IS_ERR(kern_btf)) ++ if (libbpf_get_error(kern_btf)) + return ""; + + t = btf__type_by_id(kern_btf, info->btf_vmlinux_value_type_id); +@@ -63,7 +63,7 @@ static __s32 get_map_info_type_id(void) + return map_info_type_id; + + kern_btf = get_btf_vmlinux(); +- if (IS_ERR(kern_btf)) { ++ if (libbpf_get_error(kern_btf)) { + map_info_type_id = PTR_ERR(kern_btf); + return map_info_type_id; + } +@@ -415,7 +415,7 @@ static int do_dump(int argc, char **argv + } + + kern_btf = get_btf_vmlinux(); +- if (IS_ERR(kern_btf)) ++ if (libbpf_get_error(kern_btf)) + return -1; + + if (!json_output) { +@@ -495,7 +495,7 @@ static int do_register(int argc, char ** + file = GET_ARG(); + + obj = bpf_object__open(file); +- if (IS_ERR_OR_NULL(obj)) ++ if (libbpf_get_error(obj)) + return -1; + + set_max_rlimit(); +@@ -516,7 +516,7 @@ static int do_register(int argc, char ** + continue; + + link = bpf_map__attach_struct_ops(map); +- if (IS_ERR(link)) { ++ if (libbpf_get_error(link)) { + p_err("can't register struct_ops %s: %s", + bpf_map__name(map), + strerror(-PTR_ERR(link))); +@@ -596,7 +596,7 @@ int do_struct_ops(int argc, char **argv) + + err = cmd_select(cmds, argc, argv, do_help); + +- if (!IS_ERR(btf_vmlinux)) ++ if (!libbpf_get_error(btf_vmlinux)) + btf__free(btf_vmlinux); + + return err; diff --git a/patches.suse/bsg-lib-initialize-the-bsg_job-in-bsg_transport_sg_i.patch b/patches.suse/bsg-lib-initialize-the-bsg_job-in-bsg_transport_sg_i.patch new file mode 100644 index 0000000..b355ee0 --- /dev/null +++ b/patches.suse/bsg-lib-initialize-the-bsg_job-in-bsg_transport_sg_i.patch @@ -0,0 +1,109 @@ +From: Christoph Hellwig +Date: Thu, 21 Oct 2021 08:06:04 +0200 +Subject: [PATCH] bsg-lib: initialize the bsg_job in bsg_transport_sg_io_fn +Git-commit: 237ea1602fb4cd14cd31b745a56fd0639c58eea3 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Directly initialize the bsg_job structure instead of relying on the +->.initialize_rq_fn indirection. This also removes the superflous +initialization of the second request used for BIDI requests. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Hannes Reinecke +Link: https://lore.kernel.org/r/20211021060607.264371-5-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/bsg-lib.c | 32 +++++++++++++------------------- + 1 file changed, 13 insertions(+), 19 deletions(-) + +diff --git a/block/bsg-lib.c b/block/bsg-lib.c +index ccb98276c964..10aa378702fa 100644 +--- a/block/bsg-lib.c ++++ b/block/bsg-lib.c +@@ -31,6 +31,7 @@ static int bsg_transport_sg_io_fn(struct request_queue *q, struct sg_io_v4 *hdr, + struct bsg_job *job; + struct request *rq; + struct bio *bio; ++ void *reply; + int ret; + + if (hdr->protocol != BSG_PROTOCOL_SCSI || +@@ -39,22 +40,28 @@ static int bsg_transport_sg_io_fn(struct request_queue *q, struct sg_io_v4 *hdr, + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + +- rq = blk_get_request(q, hdr->dout_xfer_len ? ++ rq = blk_mq_alloc_request(q, hdr->dout_xfer_len ? + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); + if (IS_ERR(rq)) + return PTR_ERR(rq); + rq->timeout = timeout; + + job = blk_mq_rq_to_pdu(rq); ++ reply = job->reply; ++ memset(job, 0, sizeof(*job)); ++ job->reply = reply; ++ job->reply_len = SCSI_SENSE_BUFFERSIZE; ++ job->dd_data = job + 1; ++ + job->request_len = hdr->request_len; + job->request = memdup_user(uptr64(hdr->request), hdr->request_len); + if (IS_ERR(job->request)) { + ret = PTR_ERR(job->request); +- goto out_put_request; ++ goto out_free_rq; + } + + if (hdr->dout_xfer_len && hdr->din_xfer_len) { +- job->bidi_rq = blk_get_request(rq->q, REQ_OP_DRV_IN, 0); ++ job->bidi_rq = blk_mq_alloc_request(rq->q, REQ_OP_DRV_IN, 0); + if (IS_ERR(job->bidi_rq)) { + ret = PTR_ERR(job->bidi_rq); + goto out_free_job_request; +@@ -134,11 +141,11 @@ static int bsg_transport_sg_io_fn(struct request_queue *q, struct sg_io_v4 *hdr, + blk_rq_unmap_user(job->bidi_bio); + out_free_bidi_rq: + if (job->bidi_rq) +- blk_put_request(job->bidi_rq); ++ blk_mq_free_request(job->bidi_rq); + out_free_job_request: + kfree(job->request); +-out_put_request: +- blk_put_request(rq); ++out_free_rq: ++ blk_mq_free_request(rq); + return ret; + } + +@@ -302,18 +309,6 @@ static int bsg_init_rq(struct blk_mq_tag_set *set, struct request *req, + return 0; + } + +-/* called right before the request is given to the request_queue user */ +-static void bsg_initialize_rq(struct request *req) +-{ +- struct bsg_job *job = blk_mq_rq_to_pdu(req); +- void *reply = job->reply; +- +- memset(job, 0, sizeof(*job)); +- job->reply = reply; +- job->reply_len = SCSI_SENSE_BUFFERSIZE; +- job->dd_data = job + 1; +-} +- + static void bsg_exit_rq(struct blk_mq_tag_set *set, struct request *req, + unsigned int hctx_idx) + { +@@ -350,7 +345,6 @@ static const struct blk_mq_ops bsg_mq_ops = { + .queue_rq = bsg_queue_rq, + .init_request = bsg_init_rq, + .exit_request = bsg_exit_rq, +- .initialize_rq_fn = bsg_initialize_rq, + .complete = bsg_complete, + .timeout = bsg_timeout, + }; +-- +2.35.3 + diff --git a/patches.suse/btrfs-use-bdev_nr_bytes-instead-of-open-coding-it.patch b/patches.suse/btrfs-use-bdev_nr_bytes-instead-of-open-coding-it.patch new file mode 100644 index 0000000..da738f3 --- /dev/null +++ b/patches.suse/btrfs-use-bdev_nr_bytes-instead-of-open-coding-it.patch @@ -0,0 +1,109 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:12 +0200 +Subject: [PATCH] btrfs: use bdev_nr_bytes instead of open coding it +Git-commit: cda00eba022d6a0a60740989ac79fc6a258b2d7a +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helper to read the block device size. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Reviewed-by: Anand Jain +Reviewed-by: Chaitanya Kulkarni +Acked-by: David Sterba +Link: https://lore.kernel.org/r/20211018101130.1838532-13-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/btrfs/dev-replace.c | 3 +-- + fs/btrfs/disk-io.c | 2 +- + fs/btrfs/ioctl.c | 4 ++-- + fs/btrfs/volumes.c | 8 ++++---- + 4 files changed, 8 insertions(+), 9 deletions(-) + +diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c +index d029be40ea6f..fbb8b4457a72 100644 +--- a/fs/btrfs/dev-replace.c ++++ b/fs/btrfs/dev-replace.c +@@ -283,8 +283,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, + } + + +- if (i_size_read(bdev->bd_inode) < +- btrfs_device_get_total_bytes(srcdev)) { ++ if (bdev_nr_bytes(bdev) < btrfs_device_get_total_bytes(srcdev)) { + btrfs_err(fs_info, + "target device is smaller than source device!"); + ret = -EINVAL; +diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c +index 355ea88d5c5f..29e7598584c4 100644 +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -3740,7 +3740,7 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev, + else if (ret) + return ERR_PTR(ret); + +- if (bytenr + BTRFS_SUPER_INFO_SIZE >= i_size_read(bdev->bd_inode)) ++ if (bytenr + BTRFS_SUPER_INFO_SIZE >= bdev_nr_bytes(bdev)) + return ERR_PTR(-EINVAL); + + page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS); +diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c +index cc61813213d8..36ff713da1b1 100644 +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -1730,7 +1730,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, + } + + if (!strcmp(sizestr, "max")) +- new_size = device->bdev->bd_inode->i_size; ++ new_size = bdev_nr_bytes(device->bdev); + else { + if (sizestr[0] == '-') { + mod = -1; +@@ -1771,7 +1771,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, + ret = -EINVAL; + goto out_finish; + } +- if (new_size > device->bdev->bd_inode->i_size) { ++ if (new_size > bdev_nr_bytes(device->bdev)) { + ret = -EFBIG; + goto out_finish; + } +diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c +index 2ec3b8ac8fa3..676c7c4e6e59 100644 +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -1286,7 +1286,7 @@ static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev + pgoff_t index; + + /* make sure our super fits in the device */ +- if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode)) ++ if (bytenr + PAGE_SIZE >= bdev_nr_bytes(bdev)) + return ERR_PTR(-EINVAL); + + /* make sure our super fits in the page */ +@@ -2610,8 +2610,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path + device->io_width = fs_info->sectorsize; + device->io_align = fs_info->sectorsize; + device->sector_size = fs_info->sectorsize; +- device->total_bytes = round_down(i_size_read(bdev->bd_inode), +- fs_info->sectorsize); ++ device->total_bytes = ++ round_down(bdev_nr_bytes(bdev), fs_info->sectorsize); + device->disk_total_bytes = device->total_bytes; + device->commit_total_bytes = device->total_bytes; + set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); +@@ -7236,7 +7236,7 @@ static int read_one_dev(struct extent_buffer *leaf, + + fill_device_from_item(leaf, dev_item, device); + if (device->bdev) { +- u64 max_total_bytes = i_size_read(device->bdev->bd_inode); ++ u64 max_total_bytes = bdev_nr_bytes(device->bdev); + + if (device->total_bytes > max_total_bytes) { + btrfs_err(fs_info, +-- +2.35.3 + diff --git a/patches.suse/cdrom-Remove-redundant-variable-and-its-assignment.patch b/patches.suse/cdrom-Remove-redundant-variable-and-its-assignment.patch new file mode 100644 index 0000000..d169728 --- /dev/null +++ b/patches.suse/cdrom-Remove-redundant-variable-and-its-assignment.patch @@ -0,0 +1,54 @@ +From: luo penghao +Date: Thu, 21 Oct 2021 08:46:21 +0100 +Subject: [PATCH] cdrom: Remove redundant variable and its assignment +Git-commit: bbc3925cf696422492ebdaba386e61450fa2294c +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Variable is not used in functions, and its assignment is redundant too. +So it should be deleted. Also the inner-most set of parentheses is no +longer needed. + +The clang_analyzer complains as follows: + +drivers/cdrom/cdrom.c:877: warning: + +Although the value stored to 'ret' is used in the enclosing expression, +the value is never actually read from 'ret'. + +Reported-by: Zeal Robot +Signed-off-by: luo penghao +Link: https://lore.kernel.org/all/20211020024229.1036219-1-luo.penghao@zte.com.cn +Signed-off-by: Phillip Potter +Link: https://lore.kernel.org/r/20211021074621.901-1-phil@philpotter.co.uk +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/cdrom/cdrom.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c +index 89a68457820a..9877e413fce3 100644 +--- a/drivers/cdrom/cdrom.c ++++ b/drivers/cdrom/cdrom.c +@@ -871,7 +871,7 @@ static void cdrom_mmc3_profile(struct cdrom_device_info *cdi) + { + struct packet_command cgc; + char buffer[32]; +- int ret, mmc3_profile; ++ int mmc3_profile; + + init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_READ); + +@@ -881,7 +881,7 @@ static void cdrom_mmc3_profile(struct cdrom_device_info *cdi) + cgc.cmd[8] = sizeof(buffer); /* Allocation Length */ + cgc.quiet = 1; + +- if ((ret = cdi->ops->generic_packet(cdi, &cgc))) ++ if (cdi->ops->generic_packet(cdi, &cgc)) + mmc3_profile = 0xffff; + else + mmc3_profile = (buffer[6] << 8) | buffer[7]; +-- +2.35.3 + diff --git a/patches.suse/cdrom-gdrom-add-error-handling-support-for-add_disk.patch b/patches.suse/cdrom-gdrom-add-error-handling-support-for-add_disk.patch new file mode 100644 index 0000000..7dc2b27 --- /dev/null +++ b/patches.suse/cdrom-gdrom-add-error-handling-support-for-add_disk.patch @@ -0,0 +1,41 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:02:27 -0700 +Subject: [PATCH] cdrom/gdrom: add error handling support for add_disk() +Git-commit: d6ac27c60fec4dc59473e39abf924e430a9ea320 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +Signed-off-by: Luis Chamberlain +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/cdrom/gdrom.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c +index 8e1fe75af93f..d50cc1fd34d5 100644 +--- a/drivers/cdrom/gdrom.c ++++ b/drivers/cdrom/gdrom.c +@@ -805,9 +805,14 @@ static int probe_gdrom(struct platform_device *devptr) + err = -ENOMEM; + goto probe_fail_free_irqs; + } +- add_disk(gd.disk); ++ err = add_disk(gd.disk); ++ if (err) ++ goto probe_fail_add_disk; ++ + return 0; + ++probe_fail_add_disk: ++ kfree(gd.toc); + probe_fail_free_irqs: + free_irq(HW_EVENT_GDROM_DMA, &gd); + free_irq(HW_EVENT_GDROM_CMD, &gd); +-- +2.35.3 + diff --git a/patches.suse/cramfs-use-bdev_nr_bytes-instead-of-open-coding-it.patch b/patches.suse/cramfs-use-bdev_nr_bytes-instead-of-open-coding-it.patch new file mode 100644 index 0000000..1c9fe1a --- /dev/null +++ b/patches.suse/cramfs-use-bdev_nr_bytes-instead-of-open-coding-it.patch @@ -0,0 +1,34 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:13 +0200 +Subject: [PATCH] cramfs: use bdev_nr_bytes instead of open coding it +Git-commit: 5816e91e4a14955224ae600dfea460d22588230a +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helper to read the block device size. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Link: https://lore.kernel.org/r/20211018101130.1838532-14-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/cramfs/inode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c +index 2be65269a987..666aa380011e 100644 +--- a/fs/cramfs/inode.c ++++ b/fs/cramfs/inode.c +@@ -209,7 +209,7 @@ static void *cramfs_blkdev_read(struct super_block *sb, unsigned int offset, + return read_buffers[i] + blk_offset; + } + +- devsize = mapping->host->i_size >> PAGE_SHIFT; ++ devsize = bdev_nr_bytes(sb->s_bdev) >> PAGE_SHIFT; + + /* Ok, read in BLKS_PER_BUF pages completely first. */ + for (i = 0; i < BLKS_PER_BUF; i++) { +-- +2.35.3 + diff --git a/patches.suse/direct-io-remove-blk_poll-support.patch b/patches.suse/direct-io-remove-blk_poll-support.patch new file mode 100644 index 0000000..e2e7016 --- /dev/null +++ b/patches.suse/direct-io-remove-blk_poll-support.patch @@ -0,0 +1,72 @@ +From: Christoph Hellwig +Date: Tue, 12 Oct 2021 13:12:11 +0200 +Subject: [PATCH] direct-io: remove blk_poll support +Git-commit: 94c2ed58d0d856a35c04365bdb39fee6e77547de +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +The polling support in the legacy direct-io support is a little crufty. +It already doesn't support the asynchronous polling needed for io_uring +polling, and is hard to adopt to upcoming changes in the polling +interfaces. Given that all the major file systems already use the iomap +direct I/O code, just drop the polling support. + +Signed-off-by: Christoph Hellwig +Tested-by: Mark Wunderlich +Link: https://lore.kernel.org/r/20211012111226.760968-2-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/direct-io.c | 14 ++++---------- + 1 file changed, 4 insertions(+), 10 deletions(-) + +diff --git a/fs/direct-io.c b/fs/direct-io.c +index b2e86e739d7a..453dcff0e7f5 100644 +--- a/fs/direct-io.c ++++ b/fs/direct-io.c +@@ -119,7 +119,6 @@ struct dio { + int flags; /* doesn't change */ + int op; + int op_flags; +- blk_qc_t bio_cookie; + struct gendisk *bio_disk; + struct inode *inode; + loff_t i_size; /* i_size when submitted */ +@@ -438,11 +437,10 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) + + dio->bio_disk = bio->bi_bdev->bd_disk; + +- if (sdio->submit_io) { ++ if (sdio->submit_io) + sdio->submit_io(bio, dio->inode, sdio->logical_offset_in_bio); +- dio->bio_cookie = BLK_QC_T_NONE; +- } else +- dio->bio_cookie = submit_bio(bio); ++ else ++ submit_bio(bio); + + sdio->bio = NULL; + sdio->boundary = 0; +@@ -481,9 +479,7 @@ static struct bio *dio_await_one(struct dio *dio) + __set_current_state(TASK_UNINTERRUPTIBLE); + dio->waiter = current; + spin_unlock_irqrestore(&dio->bio_lock, flags); +- if (!(dio->iocb->ki_flags & IOCB_HIPRI) || +- !blk_poll(dio->bio_disk->queue, dio->bio_cookie, true)) +- blk_io_schedule(); ++ blk_io_schedule(); + /* wake up sets us TASK_RUNNING */ + spin_lock_irqsave(&dio->bio_lock, flags); + dio->waiter = NULL; +@@ -1214,8 +1210,6 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, + } else { + dio->op = REQ_OP_READ; + } +- if (iocb->ki_flags & IOCB_HIPRI) +- dio->op_flags |= REQ_HIPRI; + + /* + * For AIO O_(D)SYNC writes we need to defer completions to a workqueue +-- +2.35.3 + diff --git a/patches.suse/dm-add-add_disk-error-handling.patch b/patches.suse/dm-add-add_disk-error-handling.patch new file mode 100644 index 0000000..be6de52 --- /dev/null +++ b/patches.suse/dm-add-add_disk-error-handling.patch @@ -0,0 +1,45 @@ +From: Luis Chamberlain +Date: Fri, 15 Oct 2021 16:30:22 -0700 +Subject: [PATCH] dm: add add_disk() error handling +Git-commit: e7089f65dd51afeda5eb760506b5950d95f9ec29 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +There are two calls to dm_setup_md_queue() which can fail then, +one on dm_early_create() and we can easily see that the error path +there calls dm_destroy in the error path. The other use case is on +the ioctl table_load case. If that fails userspace needs to call +the DM_DEV_REMOVE_CMD to cleanup the state - similar to any other +failure. + +Reviewed-by: Hannes Reinecke +Signed-off-by: Luis Chamberlain +Link: https://lore.kernel.org/r/20211015233028.2167651-4-mcgrof@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/md/dm.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/md/dm.c b/drivers/md/dm.c +index 7870e6460633..79d4ac4aab05 100644 +--- a/drivers/md/dm.c ++++ b/drivers/md/dm.c +@@ -2078,7 +2078,9 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) + if (r) + return r; + +- add_disk(md->disk); ++ r = add_disk(md->disk); ++ if (r) ++ return r; + + r = dm_sysfs_init(md); + if (r) { +-- +2.35.3 + diff --git a/patches.suse/dm-use-bdev_nr_sectors-and-bdev_nr_bytes-instead-of-.patch b/patches.suse/dm-use-bdev_nr_sectors-and-bdev_nr_bytes-instead-of-.patch new file mode 100644 index 0000000..0a34fa7 --- /dev/null +++ b/patches.suse/dm-use-bdev_nr_sectors-and-bdev_nr_bytes-instead-of-.patch @@ -0,0 +1,385 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:05 +0200 +Subject: [PATCH] dm: use bdev_nr_sectors and bdev_nr_bytes instead of open + coding them +Git-commit: 6dcbb52cddd9e50c8f6625b02a31f6dffc0d1a7b +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helpers to read the block device size. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Acked-by: Mike Snitzer +Link: https://lore.kernel.org/r/20211018101130.1838532-6-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/md/dm-bufio.c | 2 +- + drivers/md/dm-cache-metadata.c | 2 +- + drivers/md/dm-cache-target.c | 2 +- + drivers/md/dm-clone-target.c | 2 +- + drivers/md/dm-dust.c | 5 ++--- + drivers/md/dm-ebs-target.c | 2 +- + drivers/md/dm-era-target.c | 2 +- + drivers/md/dm-exception-store.h | 2 +- + drivers/md/dm-flakey.c | 3 +-- + drivers/md/dm-integrity.c | 6 +++--- + drivers/md/dm-linear.c | 3 +-- + drivers/md/dm-log-writes.c | 4 ++-- + drivers/md/dm-log.c | 2 +- + drivers/md/dm-mpath.c | 2 +- + drivers/md/dm-raid.c | 6 +++--- + drivers/md/dm-switch.c | 2 +- + drivers/md/dm-table.c | 3 +-- + drivers/md/dm-thin-metadata.c | 2 +- + drivers/md/dm-thin.c | 2 +- + drivers/md/dm-verity-target.c | 3 +-- + drivers/md/dm-writecache.c | 2 +- + drivers/md/dm-zoned-target.c | 2 +- + 22 files changed, 28 insertions(+), 33 deletions(-) + +diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c +index 50f3e673729c..104ebc1f08dc 100644 +--- a/drivers/md/dm-bufio.c ++++ b/drivers/md/dm-bufio.c +@@ -1525,7 +1525,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_get_block_size); + + sector_t dm_bufio_get_device_size(struct dm_bufio_client *c) + { +- sector_t s = i_size_read(c->bdev->bd_inode) >> SECTOR_SHIFT; ++ sector_t s = bdev_nr_sectors(c->bdev); + if (s >= c->start) + s -= c->start; + else +diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c +index 89a73204dbf4..2874f222c313 100644 +--- a/drivers/md/dm-cache-metadata.c ++++ b/drivers/md/dm-cache-metadata.c +@@ -334,7 +334,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) + int r; + struct dm_block *sblock; + struct cache_disk_superblock *disk_super; +- sector_t bdev_size = i_size_read(cmd->bdev->bd_inode) >> SECTOR_SHIFT; ++ sector_t bdev_size = bdev_nr_sectors(cmd->bdev); + + /* FIXME: see if we can lose the max sectors limit */ + if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS) +diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c +index bdd500447dea..447d030036d1 100644 +--- a/drivers/md/dm-cache-target.c ++++ b/drivers/md/dm-cache-target.c +@@ -1940,7 +1940,7 @@ static void cache_dtr(struct dm_target *ti) + + static sector_t get_dev_size(struct dm_dev *dev) + { +- return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; ++ return bdev_nr_sectors(dev->bdev); + } + + /*----------------------------------------------------------------*/ +diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c +index edd22e4d65df..4599632d7a84 100644 +--- a/drivers/md/dm-clone-target.c ++++ b/drivers/md/dm-clone-target.c +@@ -1514,7 +1514,7 @@ static void clone_status(struct dm_target *ti, status_type_t type, + + static sector_t get_dev_size(struct dm_dev *dev) + { +- return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; ++ return bdev_nr_sectors(dev->bdev); + } + + /*---------------------------------------------------------------------------*/ +diff --git a/drivers/md/dm-dust.c b/drivers/md/dm-dust.c +index 3163e2b1418e..03672204b0e3 100644 +--- a/drivers/md/dm-dust.c ++++ b/drivers/md/dm-dust.c +@@ -415,7 +415,7 @@ static int dust_message(struct dm_target *ti, unsigned int argc, char **argv, + char *result, unsigned int maxlen) + { + struct dust_device *dd = ti->private; +- sector_t size = i_size_read(dd->dev->bdev->bd_inode) >> SECTOR_SHIFT; ++ sector_t size = bdev_nr_sectors(dd->dev->bdev); + bool invalid_msg = false; + int r = -EINVAL; + unsigned long long tmp, block; +@@ -544,8 +544,7 @@ static int dust_prepare_ioctl(struct dm_target *ti, struct block_device **bdev) + /* + * Only pass ioctls through if the device sizes match exactly. + */ +- if (dd->start || +- ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT) ++ if (dd->start || ti->len != bdev_nr_sectors(dev->bdev)) + return 1; + + return 0; +diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c +index d25989660a76..7ce5d509b940 100644 +--- a/drivers/md/dm-ebs-target.c ++++ b/drivers/md/dm-ebs-target.c +@@ -416,7 +416,7 @@ static int ebs_prepare_ioctl(struct dm_target *ti, struct block_device **bdev) + * Only pass ioctls through if the device sizes match exactly. + */ + *bdev = dev->bdev; +- return !!(ec->start || ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT); ++ return !!(ec->start || ti->len != bdev_nr_sectors(dev->bdev)); + } + + static void ebs_io_hints(struct dm_target *ti, struct queue_limits *limits) +diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c +index 2a78f6874143..1f6bf152b3c7 100644 +--- a/drivers/md/dm-era-target.c ++++ b/drivers/md/dm-era-target.c +@@ -1681,7 +1681,7 @@ static int era_message(struct dm_target *ti, unsigned argc, char **argv, + + static sector_t get_dev_size(struct dm_dev *dev) + { +- return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; ++ return bdev_nr_sectors(dev->bdev); + } + + static int era_iterate_devices(struct dm_target *ti, +diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h +index 3f4139ac1f60..b5f20eba3641 100644 +--- a/drivers/md/dm-exception-store.h ++++ b/drivers/md/dm-exception-store.h +@@ -168,7 +168,7 @@ static inline void dm_consecutive_chunk_count_dec(struct dm_exception *e) + */ + static inline sector_t get_dev_size(struct block_device *bdev) + { +- return i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; ++ return bdev_nr_sectors(bdev); + } + + static inline chunk_t sector_to_chunk(struct dm_exception_store *store, +diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c +index 4b94ffe6f2d4..345229d7e59c 100644 +--- a/drivers/md/dm-flakey.c ++++ b/drivers/md/dm-flakey.c +@@ -456,8 +456,7 @@ static int flakey_prepare_ioctl(struct dm_target *ti, struct block_device **bdev + /* + * Only pass ioctls through if the device sizes match exactly. + */ +- if (fc->start || +- ti->len != i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT) ++ if (fc->start || ti->len != bdev_nr_sectors((*bdev))) + return 1; + return 0; + } +diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c +index dc03b70f6e65..d0f788e72abf 100644 +--- a/drivers/md/dm-integrity.c ++++ b/drivers/md/dm-integrity.c +@@ -4113,11 +4113,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) + } + } + +- ic->data_device_sectors = i_size_read(ic->dev->bdev->bd_inode) >> SECTOR_SHIFT; ++ ic->data_device_sectors = bdev_nr_sectors(ic->dev->bdev); + if (!ic->meta_dev) + ic->meta_device_sectors = ic->data_device_sectors; + else +- ic->meta_device_sectors = i_size_read(ic->meta_dev->bdev->bd_inode) >> SECTOR_SHIFT; ++ ic->meta_device_sectors = bdev_nr_sectors(ic->meta_dev->bdev); + + if (!journal_sectors) { + journal_sectors = min((sector_t)DEFAULT_MAX_JOURNAL_SECTORS, +@@ -4367,7 +4367,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) + DEBUG_print(" journal_sections %u\n", (unsigned)le32_to_cpu(ic->sb->journal_sections)); + DEBUG_print(" journal_entries %u\n", ic->journal_entries); + DEBUG_print(" log2_interleave_sectors %d\n", ic->sb->log2_interleave_sectors); +- DEBUG_print(" data_device_sectors 0x%llx\n", i_size_read(ic->dev->bdev->bd_inode) >> SECTOR_SHIFT); ++ DEBUG_print(" data_device_sectors 0x%llx\n", bdev_nr_sectors(ic->dev->bdev)); + DEBUG_print(" initial_sectors 0x%x\n", ic->initial_sectors); + DEBUG_print(" metadata_run 0x%x\n", ic->metadata_run); + DEBUG_print(" log2_metadata_run %d\n", ic->log2_metadata_run); +diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c +index 679b4c0a2eea..66ba16713f69 100644 +--- a/drivers/md/dm-linear.c ++++ b/drivers/md/dm-linear.c +@@ -135,8 +135,7 @@ static int linear_prepare_ioctl(struct dm_target *ti, struct block_device **bdev + /* + * Only pass ioctls through if the device sizes match exactly. + */ +- if (lc->start || +- ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT) ++ if (lc->start || ti->len != bdev_nr_sectors(dev->bdev)) + return 1; + return 0; + } +diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c +index d93a4db23512..46de085a9670 100644 +--- a/drivers/md/dm-log-writes.c ++++ b/drivers/md/dm-log-writes.c +@@ -446,7 +446,7 @@ static int log_super(struct log_writes_c *lc) + + static inline sector_t logdev_last_sector(struct log_writes_c *lc) + { +- return i_size_read(lc->logdev->bdev->bd_inode) >> SECTOR_SHIFT; ++ return bdev_nr_sectors(lc->logdev->bdev); + } + + static int log_writes_kthread(void *arg) +@@ -851,7 +851,7 @@ static int log_writes_prepare_ioctl(struct dm_target *ti, + /* + * Only pass ioctls through if the device sizes match exactly. + */ +- if (ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT) ++ if (ti->len != bdev_nr_sectors(dev->bdev)) + return 1; + return 0; + } +diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c +index 1ecf75ef276a..06f328928a7f 100644 +--- a/drivers/md/dm-log.c ++++ b/drivers/md/dm-log.c +@@ -447,7 +447,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, + bdev_logical_block_size(lc->header_location. + bdev)); + +- if (buf_size > i_size_read(dev->bdev->bd_inode)) { ++ if (buf_size > bdev_nr_bytes(dev->bdev)) { + DMWARN("log device %s too small: need %llu bytes", + dev->name, (unsigned long long)buf_size); + kfree(lc); +diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c +index 694aaca4eea2..5794f5415155 100644 +--- a/drivers/md/dm-mpath.c ++++ b/drivers/md/dm-mpath.c +@@ -2061,7 +2061,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti, + /* + * Only pass ioctls through if the device sizes match exactly. + */ +- if (!r && ti->len != i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT) ++ if (!r && ti->len != bdev_nr_sectors((*bdev))) + return 1; + return r; + } +diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c +index d9ef52159a22..2b26435a6946 100644 +--- a/drivers/md/dm-raid.c ++++ b/drivers/md/dm-raid.c +@@ -1261,7 +1261,7 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as, + md_rdev_init(jdev); + jdev->mddev = &rs->md; + jdev->bdev = rs->journal_dev.dev->bdev; +- jdev->sectors = to_sector(i_size_read(jdev->bdev->bd_inode)); ++ jdev->sectors = bdev_nr_sectors(jdev->bdev); + if (jdev->sectors < MIN_RAID456_JOURNAL_SPACE) { + rs->ti->error = "No space for raid4/5/6 journal"; + return -ENOSPC; +@@ -1607,7 +1607,7 @@ static int _check_data_dev_sectors(struct raid_set *rs) + + rdev_for_each(rdev, &rs->md) + if (!test_bit(Journal, &rdev->flags) && rdev->bdev) { +- ds = min(ds, to_sector(i_size_read(rdev->bdev->bd_inode))); ++ ds = min(ds, bdev_nr_sectors(rdev->bdev)); + if (ds < rs->md.dev_sectors) { + rs->ti->error = "Component device(s) too small"; + return -EINVAL; +@@ -2662,7 +2662,7 @@ static int rs_adjust_data_offsets(struct raid_set *rs) + * Make sure we got a minimum amount of free sectors per device + */ + if (rs->data_offset && +- to_sector(i_size_read(rdev->bdev->bd_inode)) - rs->md.dev_sectors < MIN_FREE_RESHAPE_SPACE) { ++ bdev_nr_sectors(rdev->bdev) - rs->md.dev_sectors < MIN_FREE_RESHAPE_SPACE) { + rs->ti->error = data_offset ? "No space for forward reshape" : + "No space for backward reshape"; + return -ENOSPC; +diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c +index 028a92ff6d57..534dc2ca8bb0 100644 +--- a/drivers/md/dm-switch.c ++++ b/drivers/md/dm-switch.c +@@ -529,7 +529,7 @@ static int switch_prepare_ioctl(struct dm_target *ti, struct block_device **bdev + * Only pass ioctls through if the device sizes match exactly. + */ + if (ti->len + sctx->path_list[path_nr].start != +- i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT) ++ bdev_nr_sectors((*bdev))) + return 1; + return 0; + } +diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c +index 1fa4d5582dca..d95142102bd2 100644 +--- a/drivers/md/dm-table.c ++++ b/drivers/md/dm-table.c +@@ -227,8 +227,7 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, + { + struct queue_limits *limits = data; + struct block_device *bdev = dev->bdev; +- sector_t dev_size = +- i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; ++ sector_t dev_size = bdev_nr_sectors(bdev); + unsigned short logical_block_size_sectors = + limits->logical_block_size >> SECTOR_SHIFT; + char b[BDEVNAME_SIZE]; +diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c +index c88ed14d49e6..1a96a07cbf44 100644 +--- a/drivers/md/dm-thin-metadata.c ++++ b/drivers/md/dm-thin-metadata.c +@@ -549,7 +549,7 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd) + int r; + struct dm_block *sblock; + struct thin_disk_superblock *disk_super; +- sector_t bdev_size = i_size_read(pmd->bdev->bd_inode) >> SECTOR_SHIFT; ++ sector_t bdev_size = bdev_nr_sectors(pmd->bdev); + + if (bdev_size > THIN_METADATA_MAX_SECTORS) + bdev_size = THIN_METADATA_MAX_SECTORS; +diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c +index 4c67b77c23c1..ec119d2422d5 100644 +--- a/drivers/md/dm-thin.c ++++ b/drivers/md/dm-thin.c +@@ -3212,7 +3212,7 @@ static int metadata_pre_commit_callback(void *context) + + static sector_t get_dev_size(struct block_device *bdev) + { +- return i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; ++ return bdev_nr_sectors(bdev); + } + + static void warn_if_metadata_device_too_big(struct block_device *bdev) +diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c +index aae48a8b1a04..a7efe83aad29 100644 +--- a/drivers/md/dm-verity-target.c ++++ b/drivers/md/dm-verity-target.c +@@ -834,8 +834,7 @@ static int verity_prepare_ioctl(struct dm_target *ti, struct block_device **bdev + + *bdev = v->data_dev->bdev; + +- if (v->data_start || +- ti->len != i_size_read(v->data_dev->bdev->bd_inode) >> SECTOR_SHIFT) ++ if (v->data_start || ti->len != bdev_nr_sectors(v->data_dev->bdev)) + return 1; + return 0; + } +diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c +index 18320444fb0a..017806096b91 100644 +--- a/drivers/md/dm-writecache.c ++++ b/drivers/md/dm-writecache.c +@@ -2341,7 +2341,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) + ti->error = "Cache data device lookup failed"; + goto bad; + } +- wc->memory_map_size = i_size_read(wc->ssd_dev->bdev->bd_inode); ++ wc->memory_map_size = bdev_nr_bytes(wc->ssd_dev->bdev); + + /* + * Parse the cache block size +diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c +index ae1bc48c0043..8dc21c09329f 100644 +--- a/drivers/md/dm-zoned-target.c ++++ b/drivers/md/dm-zoned-target.c +@@ -733,7 +733,7 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path, + dev->dev_idx = idx; + (void)bdevname(dev->bdev, dev->name); + +- dev->capacity = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; ++ dev->capacity = bdev_nr_sectors(bdev); + if (ti->begin) { + ti->error = "Partial mapping is not supported"; + goto err; +-- +2.35.3 + diff --git a/patches.suse/dmaengine-idxd-Do-not-enable-user-type-Work-Queue-wi.patch b/patches.suse/dmaengine-idxd-Do-not-enable-user-type-Work-Queue-wi.patch new file mode 100644 index 0000000..b4d4d56 --- /dev/null +++ b/patches.suse/dmaengine-idxd-Do-not-enable-user-type-Work-Queue-wi.patch @@ -0,0 +1,78 @@ +From 0ec8ce07394442d722806fe61b901a5b2b17249d Mon Sep 17 00:00:00 2001 +From: Fenghua Yu +Date: Fri, 14 Oct 2022 15:25:41 -0700 +Subject: [PATCH] dmaengine: idxd: Do not enable user type Work Queue without Shared Virtual Addressing +Git-commit: 0ec8ce07394442d722806fe61b901a5b2b17249d +Patch-mainline: v6.1-rc5 +References: jsc#PED-2681 + +When the idxd_user_drv driver is bound to a Work Queue (WQ) device +without IOMMU or with IOMMU Passthrough without Shared Virtual +Addressing (SVA), the application gains direct access to physical +memory via the device by programming physical address to a submitted +descriptor. This allows direct userspace read and write access to +arbitrary physical memory. This is inconsistent with the security +goals of a good kernel API. + +Unlike vfio_pci driver, the IDXD char device driver does not provide any +ways to pin user pages and translate the address from user VA to IOVA or +PA without IOMMU SVA. Therefore the application has no way to instruct the +device to perform DMA function. This makes the char device not usable for +normal application usage. + +Since user type WQ without SVA cannot be used for normal application usage +and presents the security issue, bind idxd_user_drv driver and enable user +type WQ only when SVA is enabled (i.e. user PASID is enabled). + +Fixes: 448c3de8ac83 ("dmaengine: idxd: create user driver for wq 'device'") +Cc: stable@vger.kernel.org +Suggested-by: Arjan Van De Ven +Signed-off-by: Fenghua Yu +Reviewed-by: Dave Jiang +Reviewed-by: Jerry Snitselaar +Link: https://lore.kernel.org/r/20221014222541.3912195-1-fenghua.yu@intel.com +Signed-off-by: Vinod Koul +Acked-by: Takashi Iwai + +--- + drivers/dma/idxd/cdev.c | 18 ++++++++++++++++++ + include/uapi/linux/idxd.h | 1 + + 2 files changed, 19 insertions(+) + +--- a/drivers/dma/idxd/cdev.c ++++ b/drivers/dma/idxd/cdev.c +@@ -312,6 +312,24 @@ static int idxd_user_drv_probe(struct id + if (idxd->state != IDXD_DEV_ENABLED) + return -ENXIO; + ++ /* ++ * User type WQ is enabled only when SVA is enabled for two reasons: ++ * - If no IOMMU or IOMMU Passthrough without SVA, userspace ++ * can directly access physical address through the WQ. ++ * - The IDXD cdev driver does not provide any ways to pin ++ * user pages and translate the address from user VA to IOVA or ++ * PA without IOMMU SVA. Therefore the application has no way ++ * to instruct the device to perform DMA function. This makes ++ * the cdev not usable for normal application usage. ++ */ ++ if (!device_user_pasid_enabled(idxd)) { ++ idxd->cmd_status = IDXD_SCMD_WQ_USER_NO_IOMMU; ++ dev_dbg(&idxd->pdev->dev, ++ "User type WQ cannot be enabled without SVA.\n"); ++ ++ return -EOPNOTSUPP; ++ } ++ + mutex_lock(&wq->wq_lock); + wq->type = IDXD_WQT_USER; + rc = __drv_enable_wq(wq); +--- a/include/uapi/linux/idxd.h ++++ b/include/uapi/linux/idxd.h +@@ -29,6 +29,7 @@ enum idxd_scmd_stat { + IDXD_SCMD_WQ_NO_SIZE = 0x800e0000, + IDXD_SCMD_WQ_NO_PRIV = 0x800f0000, + IDXD_SCMD_WQ_IRQ_ERR = 0x80100000, ++ IDXD_SCMD_WQ_USER_NO_IOMMU = 0x80110000, + }; + + #define IDXD_SCMD_SOFTERR_MASK 0x80000000 diff --git a/patches.suse/dmaengine-idxd-Only-call-idxd_enable_system_pasid-if.patch b/patches.suse/dmaengine-idxd-Only-call-idxd_enable_system_pasid-if.patch new file mode 100644 index 0000000..136e5c5 --- /dev/null +++ b/patches.suse/dmaengine-idxd-Only-call-idxd_enable_system_pasid-if.patch @@ -0,0 +1,98 @@ +From 8ffccd119a5908b240a26182be44c0ff3d1e3d85 Mon Sep 17 00:00:00 2001 +From: Jerry Snitselaar +Date: Sat, 25 Jun 2022 22:16:48 -0700 +Subject: [PATCH] dmaengine: idxd: Only call idxd_enable_system_pasid() if succeeded in enabling SVA feature +Git-commit: 8ffccd119a5908b240a26182be44c0ff3d1e3d85 +Patch-mainline: v5.19-rc6 +References: jsc#PED-2681 + +On a Sapphire Rapids system if boot without intel_iommu=on, the IDXD +driver will crash during probe in iommu_sva_bind_device(). + +[ 21.423729] BUG: kernel NULL pointer dereference, address: 0000000000000038 +[ 21.445108] #PF: supervisor read access in kernel mode +[ 21.450912] #PF: error_code(0x0000) - not-present page +[ 21.456706] PGD 0 +[ 21.459047] Oops: 0000 [#1] PREEMPT SMP NOPTI +[ 21.464004] CPU: 0 PID: 1420 Comm: kworker/0:3 Not tainted 5.19.0-0.rc3.27.eln120.x86_64 #1 +[ 21.464011] Hardware name: Intel Corporation EAGLESTREAM/EAGLESTREAM, BIOS EGSDCRB1.SYS.0067.D12.2110190954 10/19/2021 +[ 21.464015] Workqueue: events work_for_cpu_fn +[ 21.464030] RIP: 0010:iommu_sva_bind_device+0x1d/0xe0 +[ 21.464046] Code: c3 cc 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 57 41 56 49 89 d6 41 55 41 54 55 53 48 83 ec 08 48 8b 87 d8 02 00 00 <48> 8b 40 38 48 8b 50 10 48 83 7a 70 00 48 89 14 24 0f 84 91 00 00 +[ 21.464050] RSP: 0018:ff7245d9096b7db8 EFLAGS: 00010296 +[ 21.464054] RAX: 0000000000000000 RBX: ff1eadeec8a51000 RCX: 0000000000000000 +[ 21.464058] RDX: ff7245d9096b7e24 RSI: 0000000000000000 RDI: ff1eadeec8a510d0 +[ 21.464060] RBP: ff1eadeec8a51000 R08: ffffffffb1a12300 R09: ff1eadffbfce25b4 +[ 21.464062] R10: ffffffffffffffff R11: 0000000000000038 R12: ffffffffc09f8000 +[ 21.464065] R13: ff1eadeec8a510d0 R14: ff7245d9096b7e24 R15: ff1eaddf54429000 +[ 21.464067] FS: 0000000000000000(0000) GS:ff1eadee7f600000(0000) knlGS:0000000000000000 +[ 21.464070] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 21.464072] CR2: 0000000000000038 CR3: 00000008c0e10006 CR4: 0000000000771ef0 +[ 21.464074] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 21.464076] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 +[ 21.464078] PKRU: 55555554 +[ 21.464079] Call Trace: +[ 21.464083] +[ 21.464092] idxd_pci_probe+0x259/0x1070 [idxd] +[ 21.464121] local_pci_probe+0x3e/0x80 +[ 21.464132] work_for_cpu_fn+0x13/0x20 +[ 21.464136] process_one_work+0x1c4/0x380 +[ 21.464143] worker_thread+0x1ab/0x380 +[ 21.464147] ? _raw_spin_lock_irqsave+0x23/0x50 +[ 21.464158] ? process_one_work+0x380/0x380 +[ 21.464161] kthread+0xe6/0x110 +[ 21.464168] ? kthread_complete_and_exit+0x20/0x20 +[ 21.464172] ret_from_fork+0x1f/0x30 + +iommu_sva_bind_device() requires SVA has been enabled successfully on +the IDXD device before it's called. Otherwise, iommu_sva_bind_device() +will access a NULL pointer. If Intel IOMMU is disabled, SVA cannot be +enabled and thus idxd_enable_system_pasid() and iommu_sva_bind_device() +should not be called. + +Fixes: 42a1b73852c4 ("dmaengine: idxd: Separate user and kernel pasid enabling") +Cc: Vinod Koul +Cc: linux-kernel@vger.kernel.org +Cc: Dave Jiang +Cc: Fenghua Yu +Link: https://lore.kernel.org/dmaengine/20220623170232.6whonfjuh3m5vcoy@cantor/ +Signed-off-by: Jerry Snitselaar +Acked-by: Fenghua Yu +Link: https://lore.kernel.org/r/20220626051648.14249-1-jsnitsel@redhat.com +Signed-off-by: Vinod Koul +Acked-by: Takashi Iwai + +--- + drivers/dma/idxd/init.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c +index 355fb3ef4cbf..aa3478257ddb 100644 +--- a/drivers/dma/idxd/init.c ++++ b/drivers/dma/idxd/init.c +@@ -512,15 +512,16 @@ static int idxd_probe(struct idxd_device *idxd) + dev_dbg(dev, "IDXD reset complete\n"); + + if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) { +- if (iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA)) ++ if (iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA)) { + dev_warn(dev, "Unable to turn on user SVA feature.\n"); +- else ++ } else { + set_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags); + +- if (idxd_enable_system_pasid(idxd)) +- dev_warn(dev, "No in-kernel DMA with PASID.\n"); +- else +- set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); ++ if (idxd_enable_system_pasid(idxd)) ++ dev_warn(dev, "No in-kernel DMA with PASID.\n"); ++ else ++ set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); ++ } + } else if (!sva) { + dev_warn(dev, "User forced SVA off via module param.\n"); + } +-- +2.35.3 + diff --git a/patches.suse/dmaengine-idxd-Separate-user-and-kernel-pasid-enabli.patch b/patches.suse/dmaengine-idxd-Separate-user-and-kernel-pasid-enabli.patch new file mode 100644 index 0000000..3151949 --- /dev/null +++ b/patches.suse/dmaengine-idxd-Separate-user-and-kernel-pasid-enabli.patch @@ -0,0 +1,192 @@ +From 42a1b73852c4a176d233a192422b5e1d0ba67cbf Mon Sep 17 00:00:00 2001 +From: Dave Jiang +Date: Wed, 11 May 2022 17:11:57 -0700 +Subject: [PATCH] dmaengine: idxd: Separate user and kernel pasid enabling +Git-commit: 42a1b73852c4a176d233a192422b5e1d0ba67cbf +Patch-mainline: v5.19-rc1 +References: jsc#PED-2681 + +The idxd driver always gated the pasid enabling under a single knob and +this assumption is incorrect. The pasid used for kernel operation can be +independently toggled and has no dependency on the user pasid (and vice +versa). Split the two so they are independent "enabled" flags. + +Signed-off-by: Dave Jiang +Link: https://lore.kernel.org/r/165231431746.986466.5666862038354800551.stgit@djiang5-desk3.ch.intel.com +Signed-off-by: Vinod Koul +Acked-by: Takashi Iwai + +--- + drivers/dma/idxd/cdev.c | 4 ++-- + drivers/dma/idxd/device.c | 6 +++--- + drivers/dma/idxd/idxd.h | 16 ++++++++++++++-- + drivers/dma/idxd/init.c | 30 +++++++++++++++--------------- + drivers/dma/idxd/sysfs.c | 2 +- + 5 files changed, 35 insertions(+), 23 deletions(-) + +diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c +index bd44293804d1..c2808fd081d6 100644 +--- a/drivers/dma/idxd/cdev.c ++++ b/drivers/dma/idxd/cdev.c +@@ -99,7 +99,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) + ctx->wq = wq; + filp->private_data = ctx; + +- if (device_pasid_enabled(idxd)) { ++ if (device_user_pasid_enabled(idxd)) { + sva = iommu_sva_bind_device(dev, current->mm, NULL); + if (IS_ERR(sva)) { + rc = PTR_ERR(sva); +@@ -152,7 +152,7 @@ static int idxd_cdev_release(struct inode *node, struct file *filep) + if (wq_shared(wq)) { + idxd_device_drain_pasid(idxd, ctx->pasid); + } else { +- if (device_pasid_enabled(idxd)) { ++ if (device_user_pasid_enabled(idxd)) { + /* The wq disable in the disable pasid function will drain the wq */ + rc = idxd_wq_disable_pasid(wq); + if (rc < 0) +diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c +index 22ad9ee383e2..49ee36038cca 100644 +--- a/drivers/dma/idxd/device.c ++++ b/drivers/dma/idxd/device.c +@@ -966,7 +966,7 @@ static int idxd_wqs_setup(struct idxd_device *idxd) + if (!wq->group) + continue; + +- if (wq_shared(wq) && !device_swq_supported(idxd)) { ++ if (wq_shared(wq) && !wq_shared_supported(wq)) { + idxd->cmd_status = IDXD_SCMD_WQ_NO_SWQ_SUPPORT; + dev_warn(dev, "No shared wq support but configured.\n"); + return -EINVAL; +@@ -1264,7 +1264,7 @@ int drv_enable_wq(struct idxd_wq *wq) + + /* Shared WQ checks */ + if (wq_shared(wq)) { +- if (!device_swq_supported(idxd)) { ++ if (!wq_shared_supported(wq)) { + idxd->cmd_status = IDXD_SCMD_WQ_NO_SVM; + dev_dbg(dev, "PASID not enabled and shared wq.\n"); + goto err; +@@ -1294,7 +1294,7 @@ int drv_enable_wq(struct idxd_wq *wq) + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) { + int priv = 0; + +- if (device_pasid_enabled(idxd)) { ++ if (wq_pasid_enabled(wq)) { + if (is_idxd_wq_kernel(wq) || wq_shared(wq)) { + u32 pasid = wq_dedicated(wq) ? idxd->pasid : 0; + +diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h +index 8e03fb548d13..77d241a92bd1 100644 +--- a/drivers/dma/idxd/idxd.h ++++ b/drivers/dma/idxd/idxd.h +@@ -239,6 +239,7 @@ enum idxd_device_flag { + IDXD_FLAG_CONFIGURABLE = 0, + IDXD_FLAG_CMD_RUNNING, + IDXD_FLAG_PASID_ENABLED, ++ IDXD_FLAG_USER_PASID_ENABLED, + }; + + struct idxd_dma_dev { +@@ -469,9 +470,20 @@ static inline bool device_pasid_enabled(struct idxd_device *idxd) + return test_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); + } + +-static inline bool device_swq_supported(struct idxd_device *idxd) ++static inline bool device_user_pasid_enabled(struct idxd_device *idxd) + { +- return (support_enqcmd && device_pasid_enabled(idxd)); ++ return test_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags); ++} ++ ++static inline bool wq_pasid_enabled(struct idxd_wq *wq) ++{ ++ return (is_idxd_wq_kernel(wq) && device_pasid_enabled(wq->idxd)) || ++ (is_idxd_wq_user(wq) && device_user_pasid_enabled(wq->idxd)); ++} ++ ++static inline bool wq_shared_supported(struct idxd_wq *wq) ++{ ++ return (support_enqcmd && wq_pasid_enabled(wq)); + } + + enum idxd_portal_prot { +diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c +index 993a5dcca24f..355fb3ef4cbf 100644 +--- a/drivers/dma/idxd/init.c ++++ b/drivers/dma/idxd/init.c +@@ -512,18 +512,15 @@ static int idxd_probe(struct idxd_device *idxd) + dev_dbg(dev, "IDXD reset complete\n"); + + if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) { +- rc = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA); +- if (rc == 0) { +- rc = idxd_enable_system_pasid(idxd); +- if (rc < 0) { +- iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); +- dev_warn(dev, "Failed to enable PASID. No SVA support: %d\n", rc); +- } else { +- set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); +- } +- } else { +- dev_warn(dev, "Unable to turn on SVA feature.\n"); +- } ++ if (iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA)) ++ dev_warn(dev, "Unable to turn on user SVA feature.\n"); ++ else ++ set_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags); ++ ++ if (idxd_enable_system_pasid(idxd)) ++ dev_warn(dev, "No in-kernel DMA with PASID.\n"); ++ else ++ set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); + } else if (!sva) { + dev_warn(dev, "User forced SVA off via module param.\n"); + } +@@ -561,7 +558,8 @@ static int idxd_probe(struct idxd_device *idxd) + err: + if (device_pasid_enabled(idxd)) + idxd_disable_system_pasid(idxd); +- iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); ++ if (device_user_pasid_enabled(idxd)) ++ iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); + return rc; + } + +@@ -574,7 +572,8 @@ static void idxd_cleanup(struct idxd_device *idxd) + idxd_cleanup_internals(idxd); + if (device_pasid_enabled(idxd)) + idxd_disable_system_pasid(idxd); +- iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); ++ if (device_user_pasid_enabled(idxd)) ++ iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); + } + + static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +@@ -691,7 +690,8 @@ static void idxd_remove(struct pci_dev *pdev) + free_irq(irq_entry->vector, irq_entry); + pci_free_irq_vectors(pdev); + pci_iounmap(pdev, idxd->reg_base); +- iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); ++ if (device_user_pasid_enabled(idxd)) ++ iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA); + pci_disable_device(pdev); + destroy_workqueue(idxd->wq); + perfmon_pmu_remove(idxd); +diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c +index 7e628e31ce24..d482e708f0fa 100644 +--- a/drivers/dma/idxd/sysfs.c ++++ b/drivers/dma/idxd/sysfs.c +@@ -588,7 +588,7 @@ static ssize_t wq_mode_store(struct device *dev, + if (sysfs_streq(buf, "dedicated")) { + set_bit(WQ_FLAG_DEDICATED, &wq->flags); + wq->threshold = 0; +- } else if (sysfs_streq(buf, "shared") && device_swq_supported(idxd)) { ++ } else if (sysfs_streq(buf, "shared")) { + clear_bit(WQ_FLAG_DEDICATED, &wq->flags); + } else { + return -EINVAL; +-- +2.35.3 + diff --git a/patches.suse/dmaengine-idxd-don-t-load-pasid-config-until-needed.patch b/patches.suse/dmaengine-idxd-don-t-load-pasid-config-until-needed.patch new file mode 100644 index 0000000..96acffb --- /dev/null +++ b/patches.suse/dmaengine-idxd-don-t-load-pasid-config-until-needed.patch @@ -0,0 +1,161 @@ +From 3157dd0a366183adaea2f4d8721961637d562fee Mon Sep 17 00:00:00 2001 +From: Dave Jiang +Date: Thu, 7 Apr 2022 11:28:28 -0700 +Subject: [PATCH] dmaengine: idxd: don't load pasid config until needed +Git-commit: 3157dd0a366183adaea2f4d8721961637d562fee +Patch-mainline: v5.19-rc1 +References: jsc#PED-2681 + +The driver currently programs the system pasid to the WQ preemptively when +system pasid is enabled. Given that a dwq will reprogram the pasid and +possibly a different pasid, the programming is not necessary. The pasid_en +bit can be set for swq as it does not need pasid programming but +needs the pasid_en bit. Remove system pasid programming on device config +write. Add pasid programming for kernel wq type on wq driver enable. The +char dev driver already reprograms the dwq on ->open() call so there's no +change. + +Signed-off-by: Dave Jiang +Link: https://lore.kernel.org/r/164935607115.1660372.6734518676950372366.stgit@djiang5-desk3.ch.intel.com +Signed-off-by: Vinod Koul +Acked-by: Takashi Iwai + +--- + drivers/dma/idxd/device.c | 66 ++++++++++++++++++++++++++++-------- + drivers/dma/idxd/registers.h | 1 + + 2 files changed, 53 insertions(+), 14 deletions(-) + +diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c +index 3061fe857d69..2903f8bb30e1 100644 +--- a/drivers/dma/idxd/device.c ++++ b/drivers/dma/idxd/device.c +@@ -299,24 +299,46 @@ void idxd_wqs_unmap_portal(struct idxd_device *idxd) + } + } + +-int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid) ++static void __idxd_wq_set_priv_locked(struct idxd_wq *wq, int priv) + { + struct idxd_device *idxd = wq->idxd; +- int rc; + union wqcfg wqcfg; + unsigned int offset; + +- rc = idxd_wq_disable(wq, false); +- if (rc < 0) +- return rc; ++ offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PRIVL_IDX); ++ spin_lock(&idxd->dev_lock); ++ wqcfg.bits[WQCFG_PRIVL_IDX] = ioread32(idxd->reg_base + offset); ++ wqcfg.priv = priv; ++ wq->wqcfg->bits[WQCFG_PRIVL_IDX] = wqcfg.bits[WQCFG_PRIVL_IDX]; ++ iowrite32(wqcfg.bits[WQCFG_PRIVL_IDX], idxd->reg_base + offset); ++ spin_unlock(&idxd->dev_lock); ++} ++ ++static void __idxd_wq_set_pasid_locked(struct idxd_wq *wq, int pasid) ++{ ++ struct idxd_device *idxd = wq->idxd; ++ union wqcfg wqcfg; ++ unsigned int offset; + + offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PASID_IDX); + spin_lock(&idxd->dev_lock); + wqcfg.bits[WQCFG_PASID_IDX] = ioread32(idxd->reg_base + offset); + wqcfg.pasid_en = 1; + wqcfg.pasid = pasid; ++ wq->wqcfg->bits[WQCFG_PASID_IDX] = wqcfg.bits[WQCFG_PASID_IDX]; + iowrite32(wqcfg.bits[WQCFG_PASID_IDX], idxd->reg_base + offset); + spin_unlock(&idxd->dev_lock); ++} ++ ++int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid) ++{ ++ int rc; ++ ++ rc = idxd_wq_disable(wq, false); ++ if (rc < 0) ++ return rc; ++ ++ __idxd_wq_set_pasid_locked(wq, pasid); + + rc = idxd_wq_enable(wq); + if (rc < 0) +@@ -797,7 +819,7 @@ static int idxd_wq_config_write(struct idxd_wq *wq) + */ + for (i = 0; i < WQCFG_STRIDES(idxd); i++) { + wq_offset = WQCFG_OFFSET(idxd, wq->id, i); +- wq->wqcfg->bits[i] = ioread32(idxd->reg_base + wq_offset); ++ wq->wqcfg->bits[i] |= ioread32(idxd->reg_base + wq_offset); + } + + if (wq->size == 0 && wq->type != IDXD_WQT_NONE) +@@ -813,14 +835,8 @@ static int idxd_wq_config_write(struct idxd_wq *wq) + if (wq_dedicated(wq)) + wq->wqcfg->mode = 1; + +- if (device_pasid_enabled(idxd)) { +- wq->wqcfg->pasid_en = 1; +- if (wq->type == IDXD_WQT_KERNEL && wq_dedicated(wq)) +- wq->wqcfg->pasid = idxd->pasid; +- } +- + /* +- * Here the priv bit is set depending on the WQ type. priv = 1 if the ++ * The WQ priv bit is set depending on the WQ type. priv = 1 if the + * WQ type is kernel to indicate privileged access. This setting only + * matters for dedicated WQ. According to the DSA spec: + * If the WQ is in dedicated mode, WQ PASID Enable is 1, and the +@@ -830,7 +846,6 @@ static int idxd_wq_config_write(struct idxd_wq *wq) + * In the case of a dedicated kernel WQ that is not able to support + * the PASID cap, then the configuration will be rejected. + */ +- wq->wqcfg->priv = !!(wq->type == IDXD_WQT_KERNEL); + if (wq_dedicated(wq) && wq->wqcfg->pasid_en && + !idxd_device_pasid_priv_enabled(idxd) && + wq->type == IDXD_WQT_KERNEL) { +@@ -1263,6 +1278,29 @@ int __drv_enable_wq(struct idxd_wq *wq) + } + } + ++ /* ++ * In the event that the WQ is configurable for pasid and priv bits. ++ * For kernel wq, the driver should setup the pasid, pasid_en, and priv bit. ++ * However, for non-kernel wq, the driver should only set the pasid_en bit for ++ * shared wq. A dedicated wq that is not 'kernel' type will configure pasid and ++ * pasid_en later on so there is no need to setup. ++ */ ++ if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) { ++ int priv = 0; ++ ++ if (device_pasid_enabled(idxd)) { ++ if (is_idxd_wq_kernel(wq) || wq_shared(wq)) { ++ u32 pasid = wq_dedicated(wq) ? idxd->pasid : 0; ++ ++ __idxd_wq_set_pasid_locked(wq, pasid); ++ } ++ } ++ ++ if (is_idxd_wq_kernel(wq)) ++ priv = 1; ++ __idxd_wq_set_priv_locked(wq, priv); ++ } ++ + rc = 0; + spin_lock(&idxd->dev_lock); + if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) +diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h +index aa642aecdc0b..02449aa9c454 100644 +--- a/drivers/dma/idxd/registers.h ++++ b/drivers/dma/idxd/registers.h +@@ -353,6 +353,7 @@ union wqcfg { + } __packed; + + #define WQCFG_PASID_IDX 2 ++#define WQCFG_PRIVL_IDX 2 + #define WQCFG_OCCUP_IDX 6 + + #define WQCFG_OCCUP_MASK 0xffff +-- +2.35.3 + diff --git a/patches.suse/dmaengine-tegra-Fix-build-error-without-IOMMU_API.patch b/patches.suse/dmaengine-tegra-Fix-build-error-without-IOMMU_API.patch new file mode 100644 index 0000000..05780bd --- /dev/null +++ b/patches.suse/dmaengine-tegra-Fix-build-error-without-IOMMU_API.patch @@ -0,0 +1,43 @@ +From 2cdd3ca67aeabf4d6274af2d1c7e22f17a33dd64 Mon Sep 17 00:00:00 2001 +From: YueHaibing +Date: Thu, 5 May 2022 17:32:36 +0800 +Subject: [PATCH] dmaengine: tegra: Fix build error without IOMMU_API +Mime-version: 1.0 +Content-type: text/plain; charset=UTF-8 +Content-transfer-encoding: 8bit +Git-commit: 2cdd3ca67aeabf4d6274af2d1c7e22f17a33dd64 +Patch-mainline: v5.19-rc1 +References: git-fixes + +Drivers/dma/tegra186-gpc-dma.c: In function ‘tegra_dma_probe’: +drivers/dma/tegra186-gpc-dma.c:1364:24: error: ‘struct iommu_fwspec’ has no member named ‘ids’ + stream_id = iommu_spec->ids[0] & 0xffff; + ^~ + +Make TEGRA186_GPC_DMA depends on IOMMU_API to fix this. + +Fixes: ee17028009d4 ("dmaengine: tegra: Add tegra gpcdma driver") +Signed-off-by: YueHaibing +Link: https://lore.kernel.org/r/20220505093236.15076-1-yuehaibing@huawei.com +Signed-off-by: Vinod Koul +Acked-by: Takashi Iwai + +--- + drivers/dma/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig +index cc1464e4acde..857b2174a4cb 100644 +--- a/drivers/dma/Kconfig ++++ b/drivers/dma/Kconfig +@@ -632,6 +632,7 @@ config TXX9_DMAC + config TEGRA186_GPC_DMA + tristate "NVIDIA Tegra GPC DMA support" + depends on (ARCH_TEGRA || COMPILE_TEST) && ARCH_DMA_ADDR_T_64BIT ++ depends on IOMMU_API + select DMA_ENGINE + help + Support for the NVIDIA Tegra General Purpose Central DMA controller. +-- +2.35.3 + diff --git a/patches.suse/doc-Fix-typo-in-request-queue-sysfs-documentation.patch b/patches.suse/doc-Fix-typo-in-request-queue-sysfs-documentation.patch new file mode 100644 index 0000000..ae0d02b --- /dev/null +++ b/patches.suse/doc-Fix-typo-in-request-queue-sysfs-documentation.patch @@ -0,0 +1,37 @@ +From: Damien Le Moal +Date: Wed, 27 Oct 2021 11:22:23 +0900 +Subject: [PATCH] doc: Fix typo in request queue sysfs documentation +Git-commit: 9d824642889823c464847342d6ff530b9eee3241 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Fix a typo (are -> as) in the introduction paragraph of +Documentation/block/queue-sysfs.rst. + +Signed-off-by: Damien Le Moal +Reviewed-by: Hannes Reinecke +Reviewed-by: Martin K. Petersen +Reviewed-by: Keith Busch +Link: https://lore.kernel.org/r/20211027022223.183838-6-damien.lemoal@wdc.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + Documentation/block/queue-sysfs.rst | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst +index b6e8983d8eda..e8c74306f70a 100644 +--- a/Documentation/block/queue-sysfs.rst ++++ b/Documentation/block/queue-sysfs.rst +@@ -4,7 +4,7 @@ Queue sysfs files + + This text file will detail the queue files that are located in the sysfs tree + for each block device. Note that stacked devices typically do not export +-any settings, since their queue merely functions are a remapping target. ++any settings, since their queue merely functions as a remapping target. + These files are the ones found in the /sys/block/xxx/queue/ directory. + + Files denoted with a RO postfix are readonly and the RW postfix means +-- +2.35.3 + diff --git a/patches.suse/doc-document-sysfs-queue-independent_access_ranges-a.patch b/patches.suse/doc-document-sysfs-queue-independent_access_ranges-a.patch new file mode 100644 index 0000000..fe639cc --- /dev/null +++ b/patches.suse/doc-document-sysfs-queue-independent_access_ranges-a.patch @@ -0,0 +1,66 @@ +From: Damien Le Moal +Date: Wed, 27 Oct 2021 11:22:22 +0900 +Subject: [PATCH] doc: document sysfs queue/independent_access_ranges + attributes +Git-commit: 6b3bae2324d2ecaa404ceab869018011b7ef6a90 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Update the file Documentation/block/queue-sysfs.rst to add a description +of a device queue sysfs entries related to independent access ranges +(e.g. concurrent positioning ranges for multi-actuator hard-disks). + +Signed-off-by: Damien Le Moal +Reviewed-by: Hannes Reinecke +Reviewed-by: Martin K. Petersen +Reviewed-by: Keith Busch +Link: https://lore.kernel.org/r/20211027022223.183838-5-damien.lemoal@wdc.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + Documentation/block/queue-sysfs.rst | 31 +++++++++++++++++++++++++++++ + 1 file changed, 31 insertions(+) + +diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst +index 4dc7f0d499a8..b6e8983d8eda 100644 +--- a/Documentation/block/queue-sysfs.rst ++++ b/Documentation/block/queue-sysfs.rst +@@ -286,4 +286,35 @@ sequential zones of zoned block devices (devices with a zoned attributed + that reports "host-managed" or "host-aware"). This value is always 0 for + regular block devices. + ++independent_access_ranges (RO) ++------------------------------ ++ ++The presence of this sub-directory of the /sys/block/xxx/queue/ directory ++indicates that the device is capable of executing requests targeting ++different sector ranges in parallel. For instance, single LUN multi-actuator ++hard-disks will have an independent_access_ranges directory if the device ++correctly advertizes the sector ranges of its actuators. ++ ++The independent_access_ranges directory contains one directory per access ++range, with each range described using the sector (RO) attribute file to ++indicate the first sector of the range and the nr_sectors (RO) attribute file ++to indicate the total number of sectors in the range starting from the first ++sector of the range. For example, a dual-actuator hard-disk will have the ++following independent_access_ranges entries.:: ++ ++ $ tree /sys/block//queue/independent_access_ranges/ ++ /sys/block//queue/independent_access_ranges/ ++ |-- 0 ++ | |-- nr_sectors ++ | `-- sector ++ `-- 1 ++ |-- nr_sectors ++ `-- sector ++ ++The sector and nr_sectors attributes use 512B sector unit, regardless of ++the actual block size of the device. Independent access ranges do not ++overlap and include all sectors within the device capacity. The access ++ranges are numbered in increasing order of the range start sector, ++that is, the sector attribute of range 0 always has the value 0. ++ + Jens Axboe , February 2009 +-- +2.35.3 + diff --git a/patches.suse/docs-bpf-Update-documentation-for-BTF_KIND_TYPE_TAG-.patch b/patches.suse/docs-bpf-Update-documentation-for-BTF_KIND_TYPE_TAG-.patch new file mode 100644 index 0000000..5c1223b --- /dev/null +++ b/patches.suse/docs-bpf-Update-documentation-for-BTF_KIND_TYPE_TAG-.patch @@ -0,0 +1,54 @@ +From: Yonghong Song +Date: Thu, 11 Nov 2021 17:26:56 -0800 +Subject: docs/bpf: Update documentation for BTF_KIND_TYPE_TAG support +Patch-mainline: v5.17-rc1 +Git-commit: d52f5c639dd8605d2563b77b190e278f615a2b8a +References: jsc#PED-1368 + +Add BTF_KIND_TYPE_TAG documentation in btf.rst. + +Signed-off-by: Yonghong Song +Signed-off-by: Alexei Starovoitov +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211112012656.1509082-1-yhs@fb.com +Acked-by: Shung-Hsi Yu +--- + Documentation/bpf/btf.rst | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +--- a/Documentation/bpf/btf.rst ++++ b/Documentation/bpf/btf.rst +@@ -86,6 +86,7 @@ sequentially and type id is assigned to + #define BTF_KIND_DATASEC 15 /* Section */ + #define BTF_KIND_FLOAT 16 /* Floating point */ + #define BTF_KIND_DECL_TAG 17 /* Decl Tag */ ++ #define BTF_KIND_TYPE_TAG 18 /* Type Tag */ + + Note that the type section encodes debug info, not just pure types. + ``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram. +@@ -107,7 +108,7 @@ Each type contains the following common + * "size" tells the size of the type it is describing. + * + * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, +- * FUNC, FUNC_PROTO and DECL_TAG. ++ * FUNC, FUNC_PROTO, DECL_TAG and TYPE_TAG. + * "type" is a type_id referring to another type. + */ + union { +@@ -492,6 +493,16 @@ the attribute is applied to a ``struct`` + a ``func`` argument, and ``btf_decl_tag.component_idx`` should be a + valid index (starting from 0) pointing to a member or an argument. + ++2.2.17 BTF_KIND_TYPE_TAG ++~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++``struct btf_type`` encoding requirement: ++ * ``name_off``: offset to a non-empty string ++ * ``info.kind_flag``: 0 ++ * ``info.kind``: BTF_KIND_TYPE_TAG ++ * ``info.vlen``: 0 ++ * ``type``: the type with ``btf_type_tag`` attribute ++ + 3. BTF Kernel API + ***************** + diff --git a/patches.suse/drbd-use-bdev_nr_sectors-instead-of-open-coding-it.patch b/patches.suse/drbd-use-bdev_nr_sectors-instead-of-open-coding-it.patch new file mode 100644 index 0000000..ee13baf --- /dev/null +++ b/patches.suse/drbd-use-bdev_nr_sectors-instead-of-open-coding-it.patch @@ -0,0 +1,37 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:04 +0200 +Subject: [PATCH] drbd: use bdev_nr_sectors instead of open coding it +Git-commit: da7b392467da82f8b8cbfe69360e3128688c9ddb +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helper to read the block device size. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Reviewed-by: Lee Duncan +Reviewed-by: Chaitanya Kulkarni +Link: https://lore.kernel.org/r/20211018101130.1838532-5-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/drbd/drbd_int.h | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h +index 6674a0b88341..f27d5b0f9a0b 100644 +--- a/drivers/block/drbd/drbd_int.h ++++ b/drivers/block/drbd/drbd_int.h +@@ -1826,8 +1826,7 @@ static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev) + /* Returns the number of 512 byte sectors of the device */ + static inline sector_t drbd_get_capacity(struct block_device *bdev) + { +- /* return bdev ? get_capacity(bdev->bd_disk) : 0; */ +- return bdev ? i_size_read(bdev->bd_inode) >> 9 : 0; ++ return bdev ? bdev_nr_sectors(bdev) : 0; + } + + /** +-- +2.35.3 + diff --git a/patches.suse/drivers-hv-vmbus-introduce-lock-unlock-_requestor.patch b/patches.suse/drivers-hv-vmbus-introduce-lock-unlock-_requestor.patch index 8184d7b..7823dad 100644 --- a/patches.suse/drivers-hv-vmbus-introduce-lock-unlock-_requestor.patch +++ b/patches.suse/drivers-hv-vmbus-introduce-lock-unlock-_requestor.patch @@ -2,7 +2,7 @@ From b91eaf7267cf7aec0a4e087decf7770dfb694d78 Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Tue, 19 Apr 2022 14:23:24 +0200 Subject: [PATCH] Drivers: hv: vmbus: Introduce {lock,unlock}_requestor() -References: bsc#1204017 +References: bsc#1204017, bsc#1205617 Git-commit: b91eaf7267cf7aec0a4e087decf7770dfb694d78 Patch-mainline: v5.19-rc1 diff --git a/patches.suse/drivers-hv-vmbus-introduce-vmbus_request_addr_match.patch b/patches.suse/drivers-hv-vmbus-introduce-vmbus_request_addr_match.patch index 6d4fefc..deacc10 100644 --- a/patches.suse/drivers-hv-vmbus-introduce-vmbus_request_addr_match.patch +++ b/patches.suse/drivers-hv-vmbus-introduce-vmbus_request_addr_match.patch @@ -2,7 +2,7 @@ From 0aadb6a7bb811554cf39318b5d18e8ec50dd9f02 Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Tue, 19 Apr 2022 14:23:23 +0200 Subject: [PATCH] Drivers: hv: vmbus: Introduce vmbus_request_addr_match() -References: bsc#1204017 +References: bsc#1204017, bsc#1205617 Git-commit: 0aadb6a7bb811554cf39318b5d18e8ec50dd9f02 Patch-mainline: v5.19-rc1 diff --git a/patches.suse/drivers-hv-vmbus-introduce-vmbus_sendpacket_getid.patch b/patches.suse/drivers-hv-vmbus-introduce-vmbus_sendpacket_getid.patch index 07327c5..bd99ee1 100644 --- a/patches.suse/drivers-hv-vmbus-introduce-vmbus_sendpacket_getid.patch +++ b/patches.suse/drivers-hv-vmbus-introduce-vmbus_sendpacket_getid.patch @@ -2,7 +2,7 @@ From b03afa57c65e1e045e02df49777e953742745f4c Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Tue, 19 Apr 2022 14:23:22 +0200 Subject: [PATCH] Drivers: hv: vmbus: Introduce vmbus_sendpacket_getid() -References: bsc#1204017 +References: bsc#1204017, bsc#1205617 Git-commit: b03afa57c65e1e045e02df49777e953742745f4c Patch-mainline: v5.19-rc1 diff --git a/patches.suse/drivers-s390-char-Add-Ultravisor-io-device b/patches.suse/drivers-s390-char-Add-Ultravisor-io-device new file mode 100644 index 0000000..d6be58d --- /dev/null +++ b/patches.suse/drivers-s390-char-Add-Ultravisor-io-device @@ -0,0 +1,445 @@ +From: Steffen Eiden +Date: Mon, 16 May 2022 11:33:35 +0000 +Subject: drivers/s390/char: Add Ultravisor io device +Git-commit: 4689752c79fa30e91b49b39a9fba93c4d1f3e20c +Patch-mainline: v5.19-rc1 +References: jsc#PED-589 + +This patch adds a new miscdevice to expose some Ultravisor functions +to userspace. Userspace can send IOCTLs to the uvdevice that will then +emit a corresponding Ultravisor Call and hands the result over to +userspace. The uvdevice is available if the Ultravisor Call facility is +present. +Userspace can call the Retrieve Attestation Measurement +Ultravisor Call using IOCTLs on the uvdevice. + +The uvdevice will do some sanity checks first. +Then, copy the request data to kernel space, build the UVCB, +perform the UV call, and copy the result back to userspace. + +Signed-off-by: Steffen Eiden +Reviewed-by: Janosch Frank +Reviewed-by: Claudio Imbrenda +Link: https://lore.kernel.org/kvm/20220516113335.338212-1-seiden@linux.ibm.com/ +Message-Id: <20220516113335.338212-1-seiden@linux.ibm.com> +Signed-off-by: Janosch Frank (whitespace and tristate fixes, pick) +Acked-by: Petr Tesarik +--- + MAINTAINERS | 2 + arch/s390/include/asm/uv.h | 23 ++- + arch/s390/include/uapi/asm/uvdevice.h | 51 ++++++ + drivers/s390/char/Kconfig | 10 + + drivers/s390/char/Makefile | 1 + drivers/s390/char/uvdevice.c | 257 ++++++++++++++++++++++++++++++++++ + 6 files changed, 343 insertions(+), 1 deletion(-) + +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -10248,9 +10248,11 @@ F: Documentation/virt/kvm/s390* + F: arch/s390/include/asm/gmap.h + F: arch/s390/include/asm/kvm* + F: arch/s390/include/uapi/asm/kvm* ++F: arch/s390/include/uapi/asm/uvdevice.h + F: arch/s390/kernel/uv.c + F: arch/s390/kvm/ + F: arch/s390/mm/gmap.c ++F: drivers/s390/char/uvdevice.c + F: tools/testing/selftests/kvm/*/s390x/ + F: tools/testing/selftests/kvm/s390x/ + +--- a/arch/s390/include/asm/uv.h ++++ b/arch/s390/include/asm/uv.h +@@ -2,7 +2,7 @@ + /* + * Ultravisor Interfaces + * +- * Copyright IBM Corp. 2019 ++ * Copyright IBM Corp. 2019, 2022 + * + * Author(s): + * Vasily Gorbik +@@ -52,6 +52,7 @@ + #define UVC_CMD_UNPIN_PAGE_SHARED 0x0342 + #define UVC_CMD_SET_SHARED_ACCESS 0x1000 + #define UVC_CMD_REMOVE_SHARED_ACCESS 0x1001 ++#define UVC_CMD_RETR_ATTEST 0x1020 + + /* Bits in installed uv calls */ + enum uv_cmds_inst { +@@ -76,6 +77,7 @@ enum uv_cmds_inst { + BIT_UVC_CMD_UNSHARE_ALL = 20, + BIT_UVC_CMD_PIN_PAGE_SHARED = 21, + BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22, ++ BIT_UVC_CMD_RETR_ATTEST = 28, + }; + + enum uv_feat_ind { +@@ -218,6 +220,25 @@ struct uv_cb_share { + u64 reserved28; + } __packed __aligned(8); + ++/* Retrieve Attestation Measurement */ ++struct uv_cb_attest { ++ struct uv_cb_header header; /* 0x0000 */ ++ u64 reserved08[2]; /* 0x0008 */ ++ u64 arcb_addr; /* 0x0018 */ ++ u64 cont_token; /* 0x0020 */ ++ u8 reserved28[6]; /* 0x0028 */ ++ u16 user_data_len; /* 0x002e */ ++ u8 user_data[256]; /* 0x0030 */ ++ u32 reserved130[3]; /* 0x0130 */ ++ u32 meas_len; /* 0x013c */ ++ u64 meas_addr; /* 0x0140 */ ++ u8 config_uid[16]; /* 0x0148 */ ++ u32 reserved158; /* 0x0158 */ ++ u32 add_data_len; /* 0x015c */ ++ u64 add_data_addr; /* 0x0160 */ ++ u64 reserved168[4]; /* 0x0168 */ ++} __packed __aligned(8); ++ + static inline int __uv_call(unsigned long r1, unsigned long r2) + { + int cc; +--- /dev/null ++++ b/arch/s390/include/uapi/asm/uvdevice.h +@@ -0,0 +1,51 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * Copyright IBM Corp. 2022 ++ * Author(s): Steffen Eiden ++ */ ++#ifndef __S390_ASM_UVDEVICE_H ++#define __S390_ASM_UVDEVICE_H ++ ++#include ++ ++struct uvio_ioctl_cb { ++ __u32 flags; ++ __u16 uv_rc; /* UV header rc value */ ++ __u16 uv_rrc; /* UV header rrc value */ ++ __u64 argument_addr; /* Userspace address of uvio argument */ ++ __u32 argument_len; ++ __u8 reserved14[0x40 - 0x14]; /* must be zero */ ++}; ++ ++#define UVIO_ATT_USER_DATA_LEN 0x100 ++#define UVIO_ATT_UID_LEN 0x10 ++struct uvio_attest { ++ __u64 arcb_addr; /* 0x0000 */ ++ __u64 meas_addr; /* 0x0008 */ ++ __u64 add_data_addr; /* 0x0010 */ ++ __u8 user_data[UVIO_ATT_USER_DATA_LEN]; /* 0x0018 */ ++ __u8 config_uid[UVIO_ATT_UID_LEN]; /* 0x0118 */ ++ __u32 arcb_len; /* 0x0128 */ ++ __u32 meas_len; /* 0x012c */ ++ __u32 add_data_len; /* 0x0130 */ ++ __u16 user_data_len; /* 0x0134 */ ++ __u16 reserved136; /* 0x0136 */ ++}; ++ ++/* ++ * The following max values define an upper length for the IOCTL in/out buffers. ++ * However, they do not represent the maximum the Ultravisor allows which is ++ * often way smaller. By allowing larger buffer sizes we hopefully do not need ++ * to update the code with every machine update. It is therefore possible for ++ * userspace to request more memory than actually used by kernel/UV. ++ */ ++#define UVIO_ATT_ARCB_MAX_LEN 0x100000 ++#define UVIO_ATT_MEASUREMENT_MAX_LEN 0x8000 ++#define UVIO_ATT_ADDITIONAL_MAX_LEN 0x8000 ++ ++#define UVIO_DEVICE_NAME "uv" ++#define UVIO_TYPE_UVC 'u' ++ ++#define UVIO_IOCTL_ATT _IOWR(UVIO_TYPE_UVC, 0x01, struct uvio_ioctl_cb) ++ ++#endif /* __S390_ASM_UVDEVICE_H */ +--- a/drivers/s390/char/Kconfig ++++ b/drivers/s390/char/Kconfig +@@ -100,6 +100,16 @@ config SCLP_OFB + This option enables the Open-for-Business interface to the s390 + Service Element. + ++config S390_UV_UAPI ++ def_tristate m ++ prompt "Ultravisor userspace API" ++ help ++ Selecting exposes parts of the UV interface to userspace ++ by providing a misc character device at /dev/uv. ++ Using IOCTLs one can interact with the UV. ++ The device is only available if the Ultravisor ++ Facility (158) is present. ++ + config S390_TAPE + def_tristate m + prompt "S/390 tape device support" +--- a/drivers/s390/char/Makefile ++++ b/drivers/s390/char/Makefile +@@ -48,6 +48,7 @@ obj-$(CONFIG_MONREADER) += monreader.o + obj-$(CONFIG_MONWRITER) += monwriter.o + obj-$(CONFIG_S390_VMUR) += vmur.o + obj-$(CONFIG_CRASH_DUMP) += sclp_sdias.o zcore.o ++obj-$(CONFIG_S390_UV_UAPI) += uvdevice.o + + hmcdrv-objs := hmcdrv_mod.o hmcdrv_dev.o hmcdrv_ftp.o hmcdrv_cache.o diag_ftp.o sclp_ftp.o + obj-$(CONFIG_HMC_DRV) += hmcdrv.o +--- /dev/null ++++ b/drivers/s390/char/uvdevice.c +@@ -0,0 +1,257 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright IBM Corp. 2022 ++ * Author(s): Steffen Eiden ++ * ++ * This file provides a Linux misc device to give userspace access to some ++ * Ultravisor (UV) functions. The device only accepts IOCTLs and will only ++ * be present if the Ultravisor facility (158) is present. ++ * ++ * When userspace sends a valid IOCTL uvdevice will copy the input data to ++ * kernel space, do some basic validity checks to avoid kernel/system ++ * corruption. Any other check that the Ultravisor does will not be done by ++ * the uvdevice to keep changes minimal when adding new functionalities ++ * to existing UV-calls. ++ * After the checks uvdevice builds a corresponding ++ * Ultravisor Call Control Block, and sends the request to the Ultravisor. ++ * Then, it copies the response, including the return codes, back to userspace. ++ * It is the responsibility of the userspace to check for any error issued ++ * by UV and to interpret the UV response. The uvdevice acts as a communication ++ * channel for userspace to the Ultravisor. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++static int uvio_build_uvcb_attest(struct uv_cb_attest *uvcb_attest, u8 *arcb, ++ u8 *meas, u8 *add_data, struct uvio_attest *uvio_attest) ++{ ++ void __user *user_buf_arcb = (void __user *)uvio_attest->arcb_addr; ++ ++ if (copy_from_user(arcb, user_buf_arcb, uvio_attest->arcb_len)) ++ return -EFAULT; ++ ++ uvcb_attest->header.len = sizeof(*uvcb_attest); ++ uvcb_attest->header.cmd = UVC_CMD_RETR_ATTEST; ++ uvcb_attest->arcb_addr = (u64)arcb; ++ uvcb_attest->cont_token = 0; ++ uvcb_attest->user_data_len = uvio_attest->user_data_len; ++ memcpy(uvcb_attest->user_data, uvio_attest->user_data, sizeof(uvcb_attest->user_data)); ++ uvcb_attest->meas_len = uvio_attest->meas_len; ++ uvcb_attest->meas_addr = (u64)meas; ++ uvcb_attest->add_data_len = uvio_attest->add_data_len; ++ uvcb_attest->add_data_addr = (u64)add_data; ++ ++ return 0; ++} ++ ++static int uvio_copy_attest_result_to_user(struct uv_cb_attest *uvcb_attest, ++ struct uvio_ioctl_cb *uv_ioctl, ++ u8 *measurement, u8 *add_data, ++ struct uvio_attest *uvio_attest) ++{ ++ struct uvio_attest __user *user_uvio_attest = (void __user *)uv_ioctl->argument_addr; ++ void __user *user_buf_add = (void __user *)uvio_attest->add_data_addr; ++ void __user *user_buf_meas = (void __user *)uvio_attest->meas_addr; ++ void __user *user_buf_uid = &user_uvio_attest->config_uid; ++ ++ if (copy_to_user(user_buf_meas, measurement, uvio_attest->meas_len)) ++ return -EFAULT; ++ if (add_data && copy_to_user(user_buf_add, add_data, uvio_attest->add_data_len)) ++ return -EFAULT; ++ if (copy_to_user(user_buf_uid, uvcb_attest->config_uid, sizeof(uvcb_attest->config_uid))) ++ return -EFAULT; ++ return 0; ++} ++ ++static int get_uvio_attest(struct uvio_ioctl_cb *uv_ioctl, struct uvio_attest *uvio_attest) ++{ ++ u8 __user *user_arg_buf = (u8 __user *)uv_ioctl->argument_addr; ++ ++ if (copy_from_user(uvio_attest, user_arg_buf, sizeof(*uvio_attest))) ++ return -EFAULT; ++ ++ if (uvio_attest->arcb_len > UVIO_ATT_ARCB_MAX_LEN) ++ return -EINVAL; ++ if (uvio_attest->arcb_len == 0) ++ return -EINVAL; ++ if (uvio_attest->meas_len > UVIO_ATT_MEASUREMENT_MAX_LEN) ++ return -EINVAL; ++ if (uvio_attest->meas_len == 0) ++ return -EINVAL; ++ if (uvio_attest->add_data_len > UVIO_ATT_ADDITIONAL_MAX_LEN) ++ return -EINVAL; ++ if (uvio_attest->reserved136) ++ return -EINVAL; ++ return 0; ++} ++ ++/** ++ * uvio_attestation() - Perform a Retrieve Attestation Measurement UVC. ++ * ++ * @uv_ioctl: ioctl control block ++ * ++ * uvio_attestation() does a Retrieve Attestation Measurement Ultravisor Call. ++ * It verifies that the given userspace addresses are valid and request sizes ++ * are sane. Every other check is made by the Ultravisor (UV) and won't result ++ * in a negative return value. It copies the input to kernelspace, builds the ++ * request, sends the UV-call, and copies the result to userspace. ++ * ++ * The Attestation Request has two input and two outputs. ++ * ARCB and User Data are inputs for the UV generated by userspace. ++ * Measurement and Additional Data are outputs for userspace generated by UV. ++ * ++ * The Attestation Request Control Block (ARCB) is a cryptographically verified ++ * and secured request to UV and User Data is some plaintext data which is ++ * going to be included in the Attestation Measurement calculation. ++ * ++ * Measurement is a cryptographic measurement of the callers properties, ++ * optional data configured by the ARCB and the user data. If specified by the ++ * ARCB, UV will add some Additional Data to the measurement calculation. ++ * This Additional Data is then returned as well. ++ * ++ * If the Retrieve Attestation Measurement UV facility is not present, ++ * UV will return invalid command rc. This won't be fenced in the driver ++ * and does not result in a negative return value. ++ * ++ * Context: might sleep ++ * ++ * Return: 0 on success or a negative error code on error. ++ */ ++static int uvio_attestation(struct uvio_ioctl_cb *uv_ioctl) ++{ ++ struct uv_cb_attest *uvcb_attest = NULL; ++ struct uvio_attest *uvio_attest = NULL; ++ u8 *measurement = NULL; ++ u8 *add_data = NULL; ++ u8 *arcb = NULL; ++ int ret; ++ ++ ret = -EINVAL; ++ if (uv_ioctl->argument_len != sizeof(*uvio_attest)) ++ goto out; ++ ++ ret = -ENOMEM; ++ uvio_attest = kzalloc(sizeof(*uvio_attest), GFP_KERNEL); ++ if (!uvio_attest) ++ goto out; ++ ++ ret = get_uvio_attest(uv_ioctl, uvio_attest); ++ if (ret) ++ goto out; ++ ++ ret = -ENOMEM; ++ arcb = kvzalloc(uvio_attest->arcb_len, GFP_KERNEL); ++ measurement = kvzalloc(uvio_attest->meas_len, GFP_KERNEL); ++ if (!arcb || !measurement) ++ goto out; ++ ++ if (uvio_attest->add_data_len) { ++ add_data = kvzalloc(uvio_attest->add_data_len, GFP_KERNEL); ++ if (!add_data) ++ goto out; ++ } ++ ++ uvcb_attest = kzalloc(sizeof(*uvcb_attest), GFP_KERNEL); ++ if (!uvcb_attest) ++ goto out; ++ ++ ret = uvio_build_uvcb_attest(uvcb_attest, arcb, measurement, add_data, uvio_attest); ++ if (ret) ++ goto out; ++ ++ uv_call_sched(0, (u64)uvcb_attest); ++ ++ uv_ioctl->uv_rc = uvcb_attest->header.rc; ++ uv_ioctl->uv_rrc = uvcb_attest->header.rrc; ++ ++ ret = uvio_copy_attest_result_to_user(uvcb_attest, uv_ioctl, measurement, add_data, ++ uvio_attest); ++out: ++ kvfree(arcb); ++ kvfree(measurement); ++ kvfree(add_data); ++ kfree(uvio_attest); ++ kfree(uvcb_attest); ++ return ret; ++} ++ ++static int uvio_copy_and_check_ioctl(struct uvio_ioctl_cb *ioctl, void __user *argp) ++{ ++ if (copy_from_user(ioctl, argp, sizeof(*ioctl))) ++ return -EFAULT; ++ if (ioctl->flags != 0) ++ return -EINVAL; ++ if (memchr_inv(ioctl->reserved14, 0, sizeof(ioctl->reserved14))) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++/* ++ * IOCTL entry point for the Ultravisor device. ++ */ ++static long uvio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ++{ ++ void __user *argp = (void __user *)arg; ++ struct uvio_ioctl_cb uv_ioctl = { }; ++ long ret; ++ ++ switch (cmd) { ++ case UVIO_IOCTL_ATT: ++ ret = uvio_copy_and_check_ioctl(&uv_ioctl, argp); ++ if (ret) ++ return ret; ++ ret = uvio_attestation(&uv_ioctl); ++ break; ++ default: ++ ret = -ENOIOCTLCMD; ++ break; ++ } ++ if (ret) ++ return ret; ++ ++ if (copy_to_user(argp, &uv_ioctl, sizeof(uv_ioctl))) ++ ret = -EFAULT; ++ ++ return ret; ++} ++ ++static const struct file_operations uvio_dev_fops = { ++ .owner = THIS_MODULE, ++ .unlocked_ioctl = uvio_ioctl, ++ .llseek = no_llseek, ++}; ++ ++static struct miscdevice uvio_dev_miscdev = { ++ .minor = MISC_DYNAMIC_MINOR, ++ .name = UVIO_DEVICE_NAME, ++ .fops = &uvio_dev_fops, ++}; ++ ++static void __exit uvio_dev_exit(void) ++{ ++ misc_deregister(&uvio_dev_miscdev); ++} ++ ++static int __init uvio_dev_init(void) ++{ ++ if (!test_facility(158)) ++ return -ENXIO; ++ return misc_register(&uvio_dev_miscdev); ++} ++ ++module_init(uvio_dev_init); ++module_exit(uvio_dev_exit); ++ ++MODULE_AUTHOR("IBM Corporation"); ++MODULE_LICENSE("GPL"); ++MODULE_DESCRIPTION("Ultravisor UAPI driver"); diff --git a/patches.suse/efi-x86-Set-the-NX-compatibility-flag-in-the-PE-head.patch b/patches.suse/efi-x86-Set-the-NX-compatibility-flag-in-the-PE-head.patch new file mode 100644 index 0000000..f97e31b --- /dev/null +++ b/patches.suse/efi-x86-Set-the-NX-compatibility-flag-in-the-PE-head.patch @@ -0,0 +1,39 @@ +From: Peter Jones +Date: Tue, 29 Mar 2022 14:47:43 -0400 +Subject: efi: x86: Set the NX-compatibility flag in the PE header +Patch-mainline: v5.19-rc1 +Git-commit: 24b72bb12e84c75e297a5a81f24b921d7a011575 +References: bsc#1205588 + +Following Baskov Evgeniy's "Handle UEFI NX-restricted page tables" +patches, it's safe to set this compatibility flag to let loaders know +they don't need to make special accommodations for kernel to load if +pre-boot NX is enabled. + +Signed-off-by: Peter Jones +Link: https://lore.kernel.org/all/20220329184743.798513-1-pjones@redhat.com/ +Signed-off-by: Ard Biesheuvel +Acked-by: Lee, Chun-Yi +--- + arch/x86/boot/header.S | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S +index 6dbd7e9f74c9..0352e4589efa 100644 +--- a/arch/x86/boot/header.S ++++ b/arch/x86/boot/header.S +@@ -163,7 +163,11 @@ extra_header_fields: + .long 0x200 # SizeOfHeaders + .long 0 # CheckSum + .word IMAGE_SUBSYSTEM_EFI_APPLICATION # Subsystem (EFI application) ++#ifdef CONFIG_DXE_MEM_ATTRIBUTES ++ .word IMAGE_DLL_CHARACTERISTICS_NX_COMPAT # DllCharacteristics ++#else + .word 0 # DllCharacteristics ++#endif + #ifdef CONFIG_X86_32 + .long 0 # SizeOfStackReserve + .long 0 # SizeOfStackCommit +-- +2.35.3 + diff --git a/patches.suse/ext4-use-sb_bdev_nr_blocks.patch b/patches.suse/ext4-use-sb_bdev_nr_blocks.patch new file mode 100644 index 0000000..541ac29 --- /dev/null +++ b/patches.suse/ext4-use-sb_bdev_nr_blocks.patch @@ -0,0 +1,37 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:26 +0200 +Subject: [PATCH] ext4: use sb_bdev_nr_blocks +Git-commit: 5513b241b2ef903b36c2ff74976618885fc5a318 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the sb_bdev_nr_blocks helper instead of open coding it. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Reviewed-by: Jan Kara +Reviewed-by: Chaitanya Kulkarni +Acked-by: Theodore Ts'o +Link: https://lore.kernel.org/r/20211018101130.1838532-27-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/ext4/super.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index 88d5d274a868..29f38251013e 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -4474,7 +4474,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) + goto cantfind_ext4; + + /* check blocks count against device size */ +- blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; ++ blocks_count = sb_bdev_nr_blocks(sb); + if (blocks_count && ext4_blocks_count(es) > blocks_count) { + ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " + "exceeds size of device (%llu blocks)", +-- +2.35.3 + diff --git a/patches.suse/fat-use-bdev_nr_sectors-instead-of-open-coding-it.patch b/patches.suse/fat-use-bdev_nr_sectors-instead-of-open-coding-it.patch new file mode 100644 index 0000000..dbf1b63 --- /dev/null +++ b/patches.suse/fat-use-bdev_nr_sectors-instead-of-open-coding-it.patch @@ -0,0 +1,41 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:14 +0200 +Subject: [PATCH] fat: use bdev_nr_sectors instead of open coding it +Git-commit: 9e48243b6506be0970d04fe2f015b1b3520ef9f3 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helper to read the block device size. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Link: https://lore.kernel.org/r/20211018101130.1838532-15-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/fat/inode.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/fs/fat/inode.c b/fs/fat/inode.c +index de0c9b013a85..9f3cd03668ad 100644 +--- a/fs/fat/inode.c ++++ b/fs/fat/inode.c +@@ -1536,14 +1536,11 @@ static int fat_read_static_bpb(struct super_block *sb, + struct fat_bios_param_block *bpb) + { + static const char *notdos1x = "This doesn't look like a DOS 1.x volume"; +- ++ sector_t bd_sects = bdev_nr_sectors(sb->s_bdev); + struct fat_floppy_defaults *fdefaults = NULL; + int error = -EINVAL; +- sector_t bd_sects; + unsigned i; + +- bd_sects = i_size_read(sb->s_bdev->bd_inode) / SECTOR_SIZE; +- + /* 16-bit DOS 1.x reliably wrote bootstrap short-jmp code */ + if (b->ignored[0] != 0xeb || b->ignored[2] != 0x90) { + if (!silent) +-- +2.35.3 + diff --git a/patches.suse/floppy-add-error-handling-support-for-add_disk.patch b/patches.suse/floppy-add-error-handling-support-for-add_disk.patch new file mode 100644 index 0000000..d53d00a --- /dev/null +++ b/patches.suse/floppy-add-error-handling-support-for-add_disk.patch @@ -0,0 +1,39 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:02:53 -0700 +Subject: [PATCH] floppy: add error handling support for add_disk() +Git-commit: 47d34aa2d211e9cef61dd85b7b0011c9f61dd0ae +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +Signed-off-by: Luis Chamberlain +Link: https://lore.kernel.org/r/20210927220302.1073499-6-mcgrof@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/floppy.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c +index acde3cb63ef4..3873e789478e 100644 +--- a/drivers/block/floppy.c ++++ b/drivers/block/floppy.c +@@ -4697,8 +4697,10 @@ static int __init do_floppy_init(void) + + registered[drive] = true; + +- device_add_disk(&floppy_device[drive].dev, disks[drive][0], +- NULL); ++ err = device_add_disk(&floppy_device[drive].dev, ++ disks[drive][0], NULL); ++ if (err) ++ goto out_remove_drives; + } + + return 0; +-- +2.35.3 + diff --git a/patches.suse/floppy-fix-add_disk-assumption-on-exit-due-to-new-de.patch b/patches.suse/floppy-fix-add_disk-assumption-on-exit-due-to-new-de.patch new file mode 100644 index 0000000..181b34f --- /dev/null +++ b/patches.suse/floppy-fix-add_disk-assumption-on-exit-due-to-new-de.patch @@ -0,0 +1,73 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:02:50 -0700 +Subject: [PATCH] floppy: fix add_disk() assumption on exit due to new +Git-commit: 2598a2bb357d64baaa94368133ddbc900b9eb246 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + developments + +After the patch titled "floppy: use blk_mq_alloc_disk and +blk_cleanup_disk" the floppy driver was modified to allocate +the blk_mq_alloc_disk() which allocates the disk with the +queue. This is further clarified later with the patch titled +"block: remove alloc_disk and alloc_disk_node". This clarifies +that: + + Most drivers should use and have been converted to use + blk_alloc_disk and blk_mq_alloc_disk. Only the scsi + ULPs and dasd still allocate a disk separately from the + request_queue so don't bother with convenience macros for + something that should not see significant new users and + remove these wrappers. + +And then we have the patch titled, "block: hold a request_queue +reference for the lifetime of struct gendisk" which ensures +that a queue is *always* present for sure during the entire +lifetime of a disk. + +In the floppy driver's case then the disk always comes with the +queue. So even if even if the queue was cleaned up on exit, putting +the disk *is* still required, and likewise, blk_cleanup_queue() on +a null queue should not happen now as disk->queue is valid from +disk allocation time on. + +Automatic backport code scrapers should hopefully not cherry pick +this patch as a stable fix candidate without full due dilligence to +ensure all the work done on the block layer to make this happen is +merged first. + +Signed-off-by: Luis Chamberlain +Link: https://lore.kernel.org/r/20210927220302.1073499-3-mcgrof@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/floppy.c | 13 ------------- + 1 file changed, 13 deletions(-) + +diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c +index 6288ce888414..2ee4d3e7ea2d 100644 +--- a/drivers/block/floppy.c ++++ b/drivers/block/floppy.c +@@ -4954,19 +4954,6 @@ static void __exit floppy_module_exit(void) + blk_cleanup_queue(disks[drive][i]->queue); + } + blk_mq_free_tag_set(&tag_sets[drive]); +- +- /* +- * These disks have not called add_disk(). Don't put down +- * queue reference in put_disk(). +- */ +- if (!(allowed_drive_mask & (1 << drive)) || +- fdc_state[FDC(drive)].version == FDC_NONE) { +- for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { +- if (disks[drive][i]) +- disks[drive][i]->queue = NULL; +- } +- } +- + for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { + if (disks[drive][i]) + put_disk(disks[drive][i]); +-- +2.35.3 + diff --git a/patches.suse/floppy-use-blk_cleanup_disk.patch b/patches.suse/floppy-use-blk_cleanup_disk.patch new file mode 100644 index 0000000..3e79b21 --- /dev/null +++ b/patches.suse/floppy-use-blk_cleanup_disk.patch @@ -0,0 +1,41 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:02:51 -0700 +Subject: [PATCH] floppy: use blk_cleanup_disk() +Git-commit: 3776339ae7acaf9590c668e86f45005fc9aff014 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the blk_cleanup_queue() followed by put_disk() can be +replaced with blk_cleanup_disk(). No need for two separate +loops. + +Signed-off-by: Luis Chamberlain +Link: https://lore.kernel.org/r/20210927220302.1073499-4-mcgrof@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/floppy.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c +index 2ee4d3e7ea2d..74996728fc24 100644 +--- a/drivers/block/floppy.c ++++ b/drivers/block/floppy.c +@@ -4951,13 +4951,9 @@ static void __exit floppy_module_exit(void) + } + for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { + if (disks[drive][i]) +- blk_cleanup_queue(disks[drive][i]->queue); ++ blk_cleanup_disk(disks[drive][i]); + } + blk_mq_free_tag_set(&tag_sets[drive]); +- for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { +- if (disks[drive][i]) +- put_disk(disks[drive][i]); +- } + } + + cancel_delayed_work_sync(&fd_timeout); +-- +2.35.3 + diff --git a/patches.suse/fs-mark-the-iomap-argument-to-__block_write_begin_in.patch b/patches.suse/fs-mark-the-iomap-argument-to-__block_write_begin_in.patch new file mode 100644 index 0000000..47b43c9 --- /dev/null +++ b/patches.suse/fs-mark-the-iomap-argument-to-__block_write_begin_in.patch @@ -0,0 +1,59 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:05 -0700 +Subject: [PATCH] fs: mark the iomap argument to __block_write_begin_int const +Git-commit: 6d49cc8545e9e9e9e5a14e75fd044f049bd6077e +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +__block_write_begin_int never modifies the passed in iomap, so mark it +const. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + fs/buffer.c | 4 ++-- + fs/internal.h | 4 ++-- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/fs/buffer.c b/fs/buffer.c +index 6290c3afdba4..bd6a9e9fbd64 100644 +--- a/fs/buffer.c ++++ b/fs/buffer.c +@@ -1912,7 +1912,7 @@ EXPORT_SYMBOL(page_zero_new_buffers); + + static void + iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, +- struct iomap *iomap) ++ const struct iomap *iomap) + { + loff_t offset = block << inode->i_blkbits; + +@@ -1966,7 +1966,7 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, + } + + int __block_write_begin_int(struct page *page, loff_t pos, unsigned len, +- get_block_t *get_block, struct iomap *iomap) ++ get_block_t *get_block, const struct iomap *iomap) + { + unsigned from = pos & (PAGE_SIZE - 1); + unsigned to = from + len; +diff --git a/fs/internal.h b/fs/internal.h +index 82e8eb32ff3d..54c2928d39ec 100644 +--- a/fs/internal.h ++++ b/fs/internal.h +@@ -48,8 +48,8 @@ static inline int emergency_thaw_bdev(struct super_block *sb) + /* + * buffer.c + */ +-extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len, +- get_block_t *get_block, struct iomap *iomap); ++int __block_write_begin_int(struct page *page, loff_t pos, unsigned len, ++ get_block_t *get_block, const struct iomap *iomap); + + /* + * char_dev.c +-- +2.35.3 + diff --git a/patches.suse/fs-simplify-init_page_buffers.patch b/patches.suse/fs-simplify-init_page_buffers.patch new file mode 100644 index 0000000..52f1dda --- /dev/null +++ b/patches.suse/fs-simplify-init_page_buffers.patch @@ -0,0 +1,35 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:10 +0200 +Subject: [PATCH] fs: simplify init_page_buffers +Git-commit: bcd1d06350e410f60518f9d778d9cc4674f57158 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +No need to convert from bdev to inode and back. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20211018101130.1838532-11-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/buffer.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/buffer.c b/fs/buffer.c +index 156358977249..46bc589b7a03 100644 +--- a/fs/buffer.c ++++ b/fs/buffer.c +@@ -897,7 +897,7 @@ init_page_buffers(struct page *page, struct block_device *bdev, + struct buffer_head *head = page_buffers(page); + struct buffer_head *bh = head; + int uptodate = PageUptodate(page); +- sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size); ++ sector_t end_block = blkdev_max_block(bdev, size); + + do { + if (!buffer_mapped(bh)) { +-- +2.35.3 + diff --git a/patches.suse/fs-use-bdev_nr_bytes-instead-of-open-coding-it-in-bl.patch b/patches.suse/fs-use-bdev_nr_bytes-instead-of-open-coding-it-in-bl.patch new file mode 100644 index 0000000..f5130c1 --- /dev/null +++ b/patches.suse/fs-use-bdev_nr_bytes-instead-of-open-coding-it-in-bl.patch @@ -0,0 +1,37 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:09 +0200 +Subject: [PATCH] fs: use bdev_nr_bytes instead of open coding it in + blkdev_max_block +Git-commit: b86058f96cc86e415e51bd12cc3786d7cdbd8b47 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helper to read the block device size. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Reviewed-by: Jan Kara +Reviewed-by: Chaitanya Kulkarni +Link: https://lore.kernel.org/r/20211018101130.1838532-10-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/buffer.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/buffer.c b/fs/buffer.c +index c615387aedca..156358977249 100644 +--- a/fs/buffer.c ++++ b/fs/buffer.c +@@ -878,7 +878,7 @@ link_dev_buffers(struct page *page, struct buffer_head *head) + static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size) + { + sector_t retval = ~((sector_t)0); +- loff_t sz = i_size_read(bdev->bd_inode); ++ loff_t sz = bdev_nr_bytes(bdev); + + if (sz) { + unsigned int sizebits = blksize_bits(size); +-- +2.35.3 + diff --git a/patches.suse/fsdax-mark-the-iomap-argument-to-dax_iomap_sector-as.patch b/patches.suse/fsdax-mark-the-iomap-argument-to-dax_iomap_sector-as.patch new file mode 100644 index 0000000..fa6dd84 --- /dev/null +++ b/patches.suse/fsdax-mark-the-iomap-argument-to-dax_iomap_sector-as.patch @@ -0,0 +1,31 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:05 -0700 +Subject: [PATCH] fsdax: mark the iomap argument to dax_iomap_sector as const +Git-commit: 7e4f4b2d689d959b03cb07dfbdb97b9696cb1076 +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + fs/dax.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/dax.c b/fs/dax.c +index da41f9363568..4d63040fd71f 100644 +--- a/fs/dax.c ++++ b/fs/dax.c +@@ -1005,7 +1005,7 @@ int dax_writeback_mapping_range(struct address_space *mapping, + } + EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); + +-static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos) ++static sector_t dax_iomap_sector(const struct iomap *iomap, loff_t pos) + { + return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9; + } +-- +2.35.3 + diff --git a/patches.suse/ftrace-Fix-null-pointer-dereference-in-ftrace_add_mod.patch b/patches.suse/ftrace-Fix-null-pointer-dereference-in-ftrace_add_mod.patch new file mode 100644 index 0000000..8e3c022 --- /dev/null +++ b/patches.suse/ftrace-Fix-null-pointer-dereference-in-ftrace_add_mod.patch @@ -0,0 +1,56 @@ +From: Xiu Jianfeng +Date: Wed, 16 Nov 2022 09:52:07 +0800 +Subject: ftrace: Fix null pointer dereference in ftrace_add_mod() +Git-commit: 19ba6c8af9382c4c05dc6a0a79af3013b9a35cd0 +Patch-mainline: v6.1-rc6 +References: git-fixes + +The @ftrace_mod is allocated by kzalloc(), so both the members {prev,next} +of @ftrace_mode->list are NULL, it's not a valid state to call list_del(). +If kstrdup() for @ftrace_mod->{func|module} fails, it goes to @out_free +tag and calls free_ftrace_mod() to destroy @ftrace_mod, then list_del() +will write prev->next and next->prev, where null pointer dereference +happens. + +BUG: kernel NULL pointer dereference, address: 0000000000000008 +Oops: 0002 [#1] PREEMPT SMP NOPTI +Call Trace: + + ftrace_mod_callback+0x20d/0x220 + ? do_filp_open+0xd9/0x140 + ftrace_process_regex.isra.51+0xbf/0x130 + ftrace_regex_write.isra.52.part.53+0x6e/0x90 + vfs_write+0xee/0x3a0 + ? __audit_filter_op+0xb1/0x100 + ? auditd_test_task+0x38/0x50 + ksys_write+0xa5/0xe0 + do_syscall_64+0x3a/0x90 + entry_SYSCALL_64_after_hwframe+0x63/0xcd +Kernel panic - not syncing: Fatal exception + +So call INIT_LIST_HEAD() to initialize the list member to fix this issue. + +Link: https://lkml.kernel.org/r/20221116015207.30858-1-xiujianfeng@huawei.com + +Cc: stable@vger.kernel.org +Fixes: 673feb9d76ab ("ftrace: Add :mod: caching infrastructure to trace_array") +Signed-off-by: Xiu Jianfeng +Signed-off-by: Steven Rostedt (Google) +Acked-by: Petr Pavlu +--- + kernel/trace/ftrace.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c +index 56a168121bfc..33236241f236 100644 +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -1289,6 +1289,7 @@ static int ftrace_add_mod(struct trace_array *tr, + if (!ftrace_mod) + return -ENOMEM; + ++ INIT_LIST_HEAD(&ftrace_mod->list); + ftrace_mod->func = kstrdup(func, GFP_KERNEL); + ftrace_mod->module = kstrdup(module, GFP_KERNEL); + ftrace_mod->enable = enable; + diff --git a/patches.suse/ftrace-Fix-the-possible-incorrect-kernel-message.patch b/patches.suse/ftrace-Fix-the-possible-incorrect-kernel-message.patch new file mode 100644 index 0000000..9c1cec6 --- /dev/null +++ b/patches.suse/ftrace-Fix-the-possible-incorrect-kernel-message.patch @@ -0,0 +1,37 @@ +From: Wang Wensheng +Date: Wed, 9 Nov 2022 09:44:32 +0000 +Subject: ftrace: Fix the possible incorrect kernel message +Git-commit: 08948caebe93482db1adfd2154eba124f66d161d +Patch-mainline: v6.1-rc6 +References: git-fixes + +If the number of mcount entries is an integer multiple of +ENTRIES_PER_PAGE, the page count showing on the console would be wrong. + +Link: https://lkml.kernel.org/r/20221109094434.84046-2-wangwensheng4@huawei.com + +Cc: +Cc: +Cc: stable@vger.kernel.org +Fixes: 5821e1b74f0d0 ("function tracing: fix wrong pos computing when read buffer has been fulfilled") +Signed-off-by: Wang Wensheng +Signed-off-by: Steven Rostedt (Google) +Acked-by: Petr Pavlu +--- + kernel/trace/ftrace.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c +index 7dc023641bf1..8b13ce2eae70 100644 +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -7391,7 +7391,7 @@ void __init ftrace_init(void) + } + + pr_info("ftrace: allocating %ld entries in %ld pages\n", +- count, count / ENTRIES_PER_PAGE + 1); ++ count, DIV_ROUND_UP(count, ENTRIES_PER_PAGE)); + + last_ftrace_enabled = ftrace_enabled = 1; + + diff --git a/patches.suse/ftrace-Fix-use-after-free-for-dynamic-ftrace_ops.patch b/patches.suse/ftrace-Fix-use-after-free-for-dynamic-ftrace_ops.patch new file mode 100644 index 0000000..0f454f7 --- /dev/null +++ b/patches.suse/ftrace-Fix-use-after-free-for-dynamic-ftrace_ops.patch @@ -0,0 +1,140 @@ +From: Li Huafei +Date: Thu, 3 Nov 2022 11:10:10 +0800 +Subject: ftrace: Fix use-after-free for dynamic ftrace_ops +Git-commit: 0e792b89e6800cd9cb4757a76a96f7ef3e8b6294 +Patch-mainline: v6.1-rc4 +References: git-fixes + +KASAN reported a use-after-free with ftrace ops [1]. It was found from +vmcore that perf had registered two ops with the same content +successively, both dynamic. After unregistering the second ops, a +use-after-free occurred. + +In ftrace_shutdown(), when the second ops is unregistered, the +FTRACE_UPDATE_CALLS command is not set because there is another enabled +ops with the same content. Also, both ops are dynamic and the ftrace +callback function is ftrace_ops_list_func, so the +FTRACE_UPDATE_TRACE_FUNC command will not be set. Eventually the value +of 'command' will be 0 and ftrace_shutdown() will skip the rcu +synchronization. + +However, ftrace may be activated. When the ops is released, another CPU +may be accessing the ops. Add the missing synchronization to fix this +problem. + +[1] +BUG: KASAN: use-after-free in __ftrace_ops_list_func kernel/trace/ftrace.c:7020 [inline] +BUG: KASAN: use-after-free in ftrace_ops_list_func+0x2b0/0x31c kernel/trace/ftrace.c:7049 +Read of size 8 at addr ffff56551965bbc8 by task syz-executor.2/14468 + +CPU: 1 PID: 14468 Comm: syz-executor.2 Not tainted 5.10.0 #7 +Hardware name: linux,dummy-virt (DT) +Call trace: + dump_backtrace+0x0/0x40c arch/arm64/kernel/stacktrace.c:132 + show_stack+0x30/0x40 arch/arm64/kernel/stacktrace.c:196 + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1b4/0x248 lib/dump_stack.c:118 + print_address_description.constprop.0+0x28/0x48c mm/kasan/report.c:387 + __kasan_report mm/kasan/report.c:547 [inline] + kasan_report+0x118/0x210 mm/kasan/report.c:564 + check_memory_region_inline mm/kasan/generic.c:187 [inline] + __asan_load8+0x98/0xc0 mm/kasan/generic.c:253 + __ftrace_ops_list_func kernel/trace/ftrace.c:7020 [inline] + ftrace_ops_list_func+0x2b0/0x31c kernel/trace/ftrace.c:7049 + ftrace_graph_call+0x0/0x4 + __might_sleep+0x8/0x100 include/linux/perf_event.h:1170 + __might_fault mm/memory.c:5183 [inline] + __might_fault+0x58/0x70 mm/memory.c:5171 + do_strncpy_from_user lib/strncpy_from_user.c:41 [inline] + strncpy_from_user+0x1f4/0x4b0 lib/strncpy_from_user.c:139 + getname_flags+0xb0/0x31c fs/namei.c:149 + getname+0x2c/0x40 fs/namei.c:209 + [...] + +Allocated by task 14445: + kasan_save_stack+0x24/0x50 mm/kasan/common.c:48 + kasan_set_track mm/kasan/common.c:56 [inline] + __kasan_kmalloc mm/kasan/common.c:479 [inline] + __kasan_kmalloc.constprop.0+0x110/0x13c mm/kasan/common.c:449 + kasan_kmalloc+0xc/0x14 mm/kasan/common.c:493 + kmem_cache_alloc_trace+0x440/0x924 mm/slub.c:2950 + kmalloc include/linux/slab.h:563 [inline] + kzalloc include/linux/slab.h:675 [inline] + perf_event_alloc.part.0+0xb4/0x1350 kernel/events/core.c:11230 + perf_event_alloc kernel/events/core.c:11733 [inline] + __do_sys_perf_event_open kernel/events/core.c:11831 [inline] + __se_sys_perf_event_open+0x550/0x15f4 kernel/events/core.c:11723 + __arm64_sys_perf_event_open+0x6c/0x80 kernel/events/core.c:11723 + [...] + +Freed by task 14445: + kasan_save_stack+0x24/0x50 mm/kasan/common.c:48 + kasan_set_track+0x24/0x34 mm/kasan/common.c:56 + kasan_set_free_info+0x20/0x40 mm/kasan/generic.c:358 + __kasan_slab_free.part.0+0x11c/0x1b0 mm/kasan/common.c:437 + __kasan_slab_free mm/kasan/common.c:445 [inline] + kasan_slab_free+0x2c/0x40 mm/kasan/common.c:446 + slab_free_hook mm/slub.c:1569 [inline] + slab_free_freelist_hook mm/slub.c:1608 [inline] + slab_free mm/slub.c:3179 [inline] + kfree+0x12c/0xc10 mm/slub.c:4176 + perf_event_alloc.part.0+0xa0c/0x1350 kernel/events/core.c:11434 + perf_event_alloc kernel/events/core.c:11733 [inline] + __do_sys_perf_event_open kernel/events/core.c:11831 [inline] + __se_sys_perf_event_open+0x550/0x15f4 kernel/events/core.c:11723 + [...] + +Link: https://lore.kernel.org/linux-trace-kernel/20221103031010.166498-1-lihuafei1@huawei.com + +Fixes: edb096e00724f ("ftrace: Fix memleak when unregistering dynamic ops when tracing disabled") +Cc: stable@vger.kernel.org +Suggested-by: Steven Rostedt +Signed-off-by: Li Huafei +Signed-off-by: Steven Rostedt (Google) +Acked-by: Petr Pavlu +--- + kernel/trace/ftrace.c | 16 +++------------- + 1 file changed, 3 insertions(+), 13 deletions(-) + +diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c +index fbf2543111c0..7dc023641bf1 100644 +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -3028,18 +3028,8 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) + command |= FTRACE_UPDATE_TRACE_FUNC; + } + +- if (!command || !ftrace_enabled) { +- /* +- * If these are dynamic or per_cpu ops, they still +- * need their data freed. Since, function tracing is +- * not currently active, we can just free them +- * without synchronizing all CPUs. +- */ +- if (ops->flags & FTRACE_OPS_FL_DYNAMIC) +- goto free_ops; +- +- return 0; +- } ++ if (!command || !ftrace_enabled) ++ goto out; + + /* + * If the ops uses a trampoline, then it needs to be +@@ -3076,6 +3066,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) + removed_ops = NULL; + ops->flags &= ~FTRACE_OPS_FL_REMOVING; + ++out: + /* + * Dynamic ops may be freed, we must make sure that all + * callers are done before leaving this function. +@@ -3103,7 +3094,6 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) + if (IS_ENABLED(CONFIG_PREEMPTION)) + synchronize_rcu_tasks(); + +- free_ops: + ftrace_trampoline_free(ops); + } + + diff --git a/patches.suse/ftrace-Optimize-the-allocation-for-mcount-entries.patch b/patches.suse/ftrace-Optimize-the-allocation-for-mcount-entries.patch new file mode 100644 index 0000000..15e0f07 --- /dev/null +++ b/patches.suse/ftrace-Optimize-the-allocation-for-mcount-entries.patch @@ -0,0 +1,37 @@ +From: Wang Wensheng +Date: Wed, 9 Nov 2022 09:44:33 +0000 +Subject: ftrace: Optimize the allocation for mcount entries +Git-commit: bcea02b096333dc74af987cb9685a4dbdd820840 +Patch-mainline: v6.1-rc6 +References: git-fixes + +If we can't allocate this size, try something smaller with half of the +size. Its order should be decreased by one instead of divided by two. + +Link: https://lkml.kernel.org/r/20221109094434.84046-3-wangwensheng4@huawei.com + +Cc: +Cc: +Cc: stable@vger.kernel.org +Fixes: a79008755497d ("ftrace: Allocate the mcount record pages as groups") +Signed-off-by: Wang Wensheng +Signed-off-by: Steven Rostedt (Google) +Acked-by: Petr Pavlu +--- + kernel/trace/ftrace.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c +index 8b13ce2eae70..56a168121bfc 100644 +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -3190,7 +3190,7 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count) + /* if we can't allocate this size, try something smaller */ + if (!order) + return -ENOMEM; +- order >>= 1; ++ order--; + goto again; + } + + diff --git a/patches.suse/gup-Turn-fault_in_pages_-readable-writeable-into-fault_in_-readable-writeable.patch b/patches.suse/gup-Turn-fault_in_pages_-readable-writeable-into-fault_in_-readable-writeable.patch index d79a7df..91bde3f 100644 --- a/patches.suse/gup-Turn-fault_in_pages_-readable-writeable-into-fault_in_-readable-writeable.patch +++ b/patches.suse/gup-Turn-fault_in_pages_-readable-writeable-into-fault_in_-readable-writeable.patch @@ -34,21 +34,25 @@ Acked-by: Ivan T. Ivanov mm/gup.c | 72 ++++++++++++++++++++++++++++++++++++ 10 files changed, 91 insertions(+), 73 deletions(-) +diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c +index d89cf802d9aa..6568823cf306 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c -@@ -669,7 +669,8 @@ static void __init kvm_use_magic_page(vo +@@ -669,7 +669,8 @@ static void __init kvm_use_magic_page(void) on_each_cpu(kvm_map_magic_page, &features, 1); /* Quick self-test to see if the mapping works */ -- if (!fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) { +- if (fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) { + if (fault_in_readable((const char __user *)KVM_MAGIC_PAGE, + sizeof(u32))) { kvm_patching_worked = false; return; } +diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c +index 0608581967f0..38c3eae40c14 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c -@@ -1048,7 +1048,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucon +@@ -1048,7 +1048,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, if (new_ctx == NULL) return 0; if (!access_ok(new_ctx, ctx_size) || @@ -57,7 +61,7 @@ Acked-by: Ivan T. Ivanov return -EFAULT; /* -@@ -1237,7 +1237,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct +@@ -1237,7 +1237,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, #endif if (!access_ok(ctx, sizeof(*ctx)) || @@ -66,9 +70,11 @@ Acked-by: Ivan T. Ivanov return -EFAULT; /* +diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c +index 1831bba0582e..9f471b4a11e3 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c -@@ -688,7 +688,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucon +@@ -688,7 +688,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, if (new_ctx == NULL) return 0; if (!access_ok(new_ctx, ctx_size) || @@ -77,6 +83,7 @@ Acked-by: Ivan T. Ivanov return -EFAULT; /* +diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -309,7 +309,7 @@ retry: @@ -88,9 +95,11 @@ Acked-by: Ivan T. Ivanov goto retry; return false; } +diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c +index 21909642ee4c..8fbb25913327 100644 --- a/drivers/gpu/drm/armada/armada_gem.c +++ b/drivers/gpu/drm/armada/armada_gem.c -@@ -336,7 +336,7 @@ int armada_gem_pwrite_ioctl(struct drm_d +@@ -336,7 +336,7 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data, struct drm_armada_gem_pwrite *args = data; struct armada_gem_object *dobj; char __user *ptr; @@ -99,7 +108,7 @@ Acked-by: Ivan T. Ivanov DRM_DEBUG_DRIVER("handle %u off %u size %u ptr 0x%llx\n", args->handle, args->offset, args->size, args->ptr); -@@ -349,9 +349,8 @@ int armada_gem_pwrite_ioctl(struct drm_d +@@ -349,9 +349,8 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data, if (!access_ok(ptr, args->size)) return -EFAULT; @@ -111,9 +120,11 @@ Acked-by: Ivan T. Ivanov dobj = armada_gem_object_lookup(file, args->handle); if (dobj == NULL) +diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c +index cc61813213d8..c0739f0af634 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c -@@ -2245,9 +2245,8 @@ static noinline int search_ioctl(struct +@@ -2261,9 +2261,8 @@ static noinline int search_ioctl(struct inode *inode, key.offset = sk->min_offset; while (1) { @@ -125,9 +136,11 @@ Acked-by: Ivan T. Ivanov break; ret = btrfs_search_forward(root, &key, path, sk->min_transid); +diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h +index 62db6b0176b9..9fe94f7a4f7e 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h -@@ -733,61 +733,10 @@ int wait_on_page_private_2_killable(stru +@@ -733,61 +733,10 @@ int wait_on_page_private_2_killable(struct page *page); extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter); /* @@ -192,9 +205,11 @@ Acked-by: Ivan T. Ivanov int add_to_page_cache_locked(struct page *page, struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); +diff --git a/lib/iov_iter.c b/lib/iov_iter.c +index 60b5e6edfbaa..c88908f0f138 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c -@@ -191,7 +191,7 @@ static size_t copy_page_to_iter_iovec(st +@@ -191,7 +191,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b buf = iov->iov_base + skip; copy = min(bytes, iov->iov_len - skip); @@ -203,7 +218,7 @@ Acked-by: Ivan T. Ivanov kaddr = kmap_atomic(page); from = kaddr + offset; -@@ -275,7 +275,7 @@ static size_t copy_page_from_iter_iovec( +@@ -275,7 +275,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t buf = iov->iov_base + skip; copy = min(bytes, iov->iov_len - skip); @@ -212,7 +227,7 @@ Acked-by: Ivan T. Ivanov kaddr = kmap_atomic(page); to = kaddr + offset; -@@ -446,13 +446,11 @@ int iov_iter_fault_in_readable(const str +@@ -446,13 +446,11 @@ int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes) bytes = i->count; for (p = i->iov, skip = i->iov_offset; bytes; p++, skip = 0) { size_t len = min(bytes, p->iov_len - skip); @@ -228,6 +243,8 @@ Acked-by: Ivan T. Ivanov bytes -= len; } } +diff --git a/mm/filemap.c b/mm/filemap.c +index dae481293b5d..ff34f4087f87 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -90,7 +90,7 @@ @@ -239,12 +256,15 @@ Acked-by: Ivan T. Ivanov * * bdi->wb.list_lock * sb_lock (fs/fs-writeback.c) +diff --git a/mm/gup.c b/mm/gup.c +index 886d6148d3d0..a7efb027d6cf 100644 --- a/mm/gup.c +++ b/mm/gup.c -@@ -1657,6 +1657,78 @@ finish_or_fault: +@@ -1656,6 +1656,78 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start, + } #endif /* !CONFIG_MMU */ - /** ++/** + * fault_in_writeable - fault in userspace address range for writing + * @uaddr: start of address range + * @size: size of address range @@ -316,7 +336,9 @@ Acked-by: Ivan T. Ivanov +} +EXPORT_SYMBOL(fault_in_readable); + -+/** + /** * get_dump_page() - pin user page in memory while writing it to core dump * @addr: user address - * +-- +2.35.3 + diff --git a/patches.suse/hfs-use-bdev_nr_sectors-instead-of-open-coding-it.patch b/patches.suse/hfs-use-bdev_nr_sectors-instead-of-open-coding-it.patch new file mode 100644 index 0000000..7b3132b --- /dev/null +++ b/patches.suse/hfs-use-bdev_nr_sectors-instead-of-open-coding-it.patch @@ -0,0 +1,34 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:15 +0200 +Subject: [PATCH] hfs: use bdev_nr_sectors instead of open coding it +Git-commit: beffd16e683eb9a600f249f9e34673ada1879f8b +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helper to read the block device size. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Link: https://lore.kernel.org/r/20211018101130.1838532-16-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/hfs/mdb.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c +index cdf0edeeb278..5beb82652435 100644 +--- a/fs/hfs/mdb.c ++++ b/fs/hfs/mdb.c +@@ -36,7 +36,7 @@ static int hfs_get_last_session(struct super_block *sb, + + /* default values */ + *start = 0; +- *size = i_size_read(sb->s_bdev->bd_inode) >> 9; ++ *size = bdev_nr_sectors(sb->s_bdev); + + if (HFS_SB(sb)->session >= 0) { + struct cdrom_tocentry te; +-- +2.35.3 + diff --git a/patches.suse/hfsplus-use-bdev_nr_sectors-instead-of-open-coding-i.patch b/patches.suse/hfsplus-use-bdev_nr_sectors-instead-of-open-coding-i.patch new file mode 100644 index 0000000..093a645 --- /dev/null +++ b/patches.suse/hfsplus-use-bdev_nr_sectors-instead-of-open-coding-i.patch @@ -0,0 +1,34 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:16 +0200 +Subject: [PATCH] hfsplus: use bdev_nr_sectors instead of open coding it +Git-commit: 78ed961bcee16dc48ca4ab22fb7936957e4dbdf0 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helper to read the block device size. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Link: https://lore.kernel.org/r/20211018101130.1838532-17-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/hfsplus/wrapper.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c +index 0350dc7821bf..51ae6f1eb4a5 100644 +--- a/fs/hfsplus/wrapper.c ++++ b/fs/hfsplus/wrapper.c +@@ -131,7 +131,7 @@ static int hfsplus_get_last_session(struct super_block *sb, + + /* default values */ + *start = 0; +- *size = i_size_read(sb->s_bdev->bd_inode) >> 9; ++ *size = bdev_nr_sectors(sb->s_bdev); + + if (HFSPLUS_SB(sb)->session >= 0) { + struct cdrom_tocentry te; +-- +2.35.3 + diff --git a/patches.suse/io_uring-don-t-sleep-when-polling-for-I-O.patch b/patches.suse/io_uring-don-t-sleep-when-polling-for-I-O.patch new file mode 100644 index 0000000..4a34ae0 --- /dev/null +++ b/patches.suse/io_uring-don-t-sleep-when-polling-for-I-O.patch @@ -0,0 +1,65 @@ +From: Christoph Hellwig +Date: Tue, 12 Oct 2021 13:12:20 +0200 +Subject: [PATCH] io_uring: don't sleep when polling for I/O +Git-commit: d729cf9acb9311956c8a37113dcfa0160a2d9665 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +There is no point in sleeping for the expected I/O completion timeout +in the io_uring async polling model as we never poll for a specific +I/O. + +Signed-off-by: Christoph Hellwig +Tested-by: Mark Wunderlich +Link: https://lore.kernel.org/r/20211012111226.760968-11-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/blk-mq.c | 3 ++- + fs/io_uring.c | 2 +- + include/linux/blkdev.h | 2 ++ + 3 files changed, 5 insertions(+), 2 deletions(-) + +diff --git a/block/blk-mq.c b/block/blk-mq.c +index 6609e10657a8..97c24e461d0a 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -4103,7 +4103,8 @@ int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags) + if (current->plug) + blk_flush_plug_list(current->plug, false); + +- if (q->poll_nsec != BLK_MQ_POLL_CLASSIC) { ++ if (!(flags & BLK_POLL_NOSLEEP) && ++ q->poll_nsec != BLK_MQ_POLL_CLASSIC) { + if (blk_mq_poll_hybrid(q, cookie)) + return 1; + } +diff --git a/fs/io_uring.c b/fs/io_uring.c +index 541fec2bd49a..c5066146b8de 100644 +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -2457,7 +2457,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, + long min) + { + struct io_kiocb *req, *tmp; +- unsigned int poll_flags = 0; ++ unsigned int poll_flags = BLK_POLL_NOSLEEP; + LIST_HEAD(done); + + /* +diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h +index e177346bc020..2b80c98fc373 100644 +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -566,6 +566,8 @@ blk_status_t errno_to_blk_status(int errno); + + /* only poll the hardware once, don't continue until a completion was found */ + #define BLK_POLL_ONESHOT (1 << 0) ++/* do not sleep to wait for the expected completion time */ ++#define BLK_POLL_NOSLEEP (1 << 1) + int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags); + + static inline struct request_queue *bdev_get_queue(struct block_device *bdev) +-- +2.35.3 + diff --git a/patches.suse/io_uring-fix-a-layering-violation-in-io_iopoll_req_i.patch b/patches.suse/io_uring-fix-a-layering-violation-in-io_iopoll_req_i.patch new file mode 100644 index 0000000..e8861dc --- /dev/null +++ b/patches.suse/io_uring-fix-a-layering-violation-in-io_iopoll_req_i.patch @@ -0,0 +1,46 @@ +From: Christoph Hellwig +Date: Tue, 12 Oct 2021 13:12:14 +0200 +Subject: [PATCH] io_uring: fix a layering violation in io_iopoll_req_issued +Git-commit: 30da1b45b130c70945b033900f45c9d61d6f3b4a +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +syscall-level code can't just poke into the details of the poll cookie, +which is private information of the block layer. + +Signed-off-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20211012111226.760968-5-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/io_uring.c | 9 +-------- + 1 file changed, 1 insertion(+), 8 deletions(-) + +diff --git a/fs/io_uring.c b/fs/io_uring.c +index e68d27829bb2..d2e86788c872 100644 +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -2738,19 +2738,12 @@ static void io_iopoll_req_issued(struct io_kiocb *req) + ctx->poll_multi_queue = false; + } else if (!ctx->poll_multi_queue) { + struct io_kiocb *list_req; +- unsigned int queue_num0, queue_num1; + + list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb, + inflight_entry); + +- if (list_req->file != req->file) { ++ if (list_req->file != req->file) + ctx->poll_multi_queue = true; +- } else { +- queue_num0 = blk_qc_t_to_queue_num(list_req->rw.kiocb.ki_cookie); +- queue_num1 = blk_qc_t_to_queue_num(req->rw.kiocb.ki_cookie); +- if (queue_num0 != queue_num1) +- ctx->poll_multi_queue = true; +- } + } + + /* +-- +2.35.3 + diff --git a/patches.suse/io_uring-fix-io_try_cancel_userdata-race-for-iowq.patch b/patches.suse/io_uring-fix-io_try_cancel_userdata-race-for-iowq.patch index af70ab1..da442ea 100644 --- a/patches.suse/io_uring-fix-io_try_cancel_userdata-race-for-iowq.patch +++ b/patches.suse/io_uring-fix-io_try_cancel_userdata-race-for-iowq.patch @@ -1,17 +1,15 @@ -From 701d5faec2b8a69c4697d4d64a57828a6adb04a3 Mon Sep 17 00:00:00 2001 +From dadebc350da2bef62593b1df007a6e0b90baf42a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov -Date: Mon, 13 Sep 2021 09:18:44 -0600 +Date: Mon, 23 Aug 2021 13:30:44 +0100 Subject: [PATCH] io_uring: fix io_try_cancel_userdata race for iowq Git-commit: dadebc350da2bef62593b1df007a6e0b90baf42a Patch-mainline: v5.15-rc1 -References: stable-5.14.6 +References: bsc#1205572 -commit dadebc350da2bef62593b1df007a6e0b90baf42a upstream. - -Warning: CPU: 1 PID: 5870 at fs/io_uring.c:5975 io_try_cancel_userdata+0x30f/0x540 fs/io_uring.c:5975 -Cpu: 0 PID: 5870 Comm: iou-wrk-5860 Not tainted 5.14.0-rc6-next-20210820-syzkaller #0 +WARNING: CPU: 1 PID: 5870 at fs/io_uring.c:5975 io_try_cancel_userdata+0x30f/0x540 fs/io_uring.c:5975 +CPU: 0 PID: 5870 Comm: iou-wrk-5860 Not tainted 5.14.0-rc6-next-20210820-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 -Rip: 0010:io_try_cancel_userdata+0x30f/0x540 fs/io_uring.c:5975 +RIP: 0010:io_try_cancel_userdata+0x30f/0x540 fs/io_uring.c:5975 Call Trace: io_async_cancel fs/io_uring.c:6014 [inline] io_issue_sqe+0x22d5/0x65a0 fs/io_uring.c:6407 @@ -27,7 +25,7 @@ io_wq_put_and_exit() always first waits for all threads to complete, so the only detail left is to zero tctx->io_wq after the context is removed. -Note: one little assumption is that when IO_WQ_WORK_CANCEL, the executor +note: one little assumption is that when IO_WQ_WORK_CANCEL, the executor won't touch ->io_wq, because io_wq_destroy() might cancel left pending requests in such a way. @@ -36,18 +34,25 @@ Reported-by: syzbot+b0c9d1588ae92866515f@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/dfdd37a80cfa9ffd3e59538929c99cdd55d8699e.1629721757.git.asml.silence@gmail.com Signed-off-by: Jens Axboe -Signed-off-by: Greg Kroah-Hartman -Acked-by: Takashi Iwai - +Signed-off-by: Gabriel Krisman Bertazi --- - fs/io_uring.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) + fs/io_uring.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c -index 0481654767a5..c5d4638f6d7f 100644 +index 827e60ae4909..6859438c4e09 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c -@@ -6310,6 +6310,7 @@ static void io_wq_submit_work(struct io_wq_work *work) +@@ -5863,7 +5863,7 @@ static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr) + struct io_ring_ctx *ctx = req->ctx; + int ret; + +- WARN_ON_ONCE(req->task != current); ++ WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current); + + ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx); + if (ret != -ENOENT) +@@ -6369,6 +6369,7 @@ static void io_wq_submit_work(struct io_wq_work *work) if (timeout) io_queue_linked_timeout(timeout); @@ -55,7 +60,7 @@ index 0481654767a5..c5d4638f6d7f 100644 if (work->flags & IO_WQ_WORK_CANCEL) ret = -ECANCELED; -@@ -9137,8 +9138,8 @@ static void io_uring_clean_tctx(struct io_uring_task *tctx) +@@ -9184,8 +9185,8 @@ static void io_uring_clean_tctx(struct io_uring_task *tctx) * Must be after io_uring_del_task_file() (removes nodes under * uring_lock) to avoid race with io_uring_try_cancel_iowq(). */ @@ -66,5 +71,5 @@ index 0481654767a5..c5d4638f6d7f 100644 } -- -2.26.2 +2.35.3 diff --git a/patches.suse/io_uring-utilize-the-io-batching-infrastructure-for-.patch b/patches.suse/io_uring-utilize-the-io-batching-infrastructure-for-.patch new file mode 100644 index 0000000..6be21a2 --- /dev/null +++ b/patches.suse/io_uring-utilize-the-io-batching-infrastructure-for-.patch @@ -0,0 +1,60 @@ +From: Jens Axboe +Date: Tue, 12 Oct 2021 09:28:46 -0600 +Subject: [PATCH] io_uring: utilize the io batching infrastructure for more + efficient polled IO +Git-commit: b688f11e86c9a22169a0e522530982735d2db19b +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Wire up using an io_comp_batch for f_op->iopoll(). If the lower stack +supports it, we can handle high rates of polled IO more efficiently. + +This raises the single core efficiency on my system from ~6.1M IOPS to +~6.6M IOPS running a random read workload at depth 128 on two gen2 +Optane drives. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/io_uring.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/fs/io_uring.c b/fs/io_uring.c +index cd77a137f2d8..d4631a55a692 100644 +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -2458,6 +2458,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, + { + struct io_kiocb *req, *tmp; + unsigned int poll_flags = BLK_POLL_NOSLEEP; ++ DEFINE_IO_COMP_BATCH(iob); + LIST_HEAD(done); + + /* +@@ -2483,17 +2484,20 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, + if (!list_empty(&done)) + break; + +- ret = kiocb->ki_filp->f_op->iopoll(kiocb, NULL, poll_flags); ++ ret = kiocb->ki_filp->f_op->iopoll(kiocb, &iob, poll_flags); + if (unlikely(ret < 0)) + return ret; + else if (ret) + poll_flags |= BLK_POLL_ONESHOT; + + /* iopoll may have completed current req */ +- if (READ_ONCE(req->iopoll_completed)) ++ if (!rq_list_empty(iob.req_list) || ++ READ_ONCE(req->iopoll_completed)) + list_move_tail(&req->inflight_entry, &done); + } + ++ if (!rq_list_empty(iob.req_list)) ++ iob.complete(&iob); + if (!list_empty(&done)) + io_iopoll_complete(ctx, nr_events, &done); + +-- +2.35.3 + diff --git a/patches.suse/iomap-Fix-some-typos-and-bad-grammar.patch b/patches.suse/iomap-Fix-some-typos-and-bad-grammar.patch new file mode 100644 index 0000000..bd1db9e --- /dev/null +++ b/patches.suse/iomap-Fix-some-typos-and-bad-grammar.patch @@ -0,0 +1,237 @@ +From: Andreas Gruenbacher +Date: Mon, 2 Aug 2021 14:46:31 -0700 +Subject: [PATCH] iomap: Fix some typos and bad grammar +Git-commit: f1f264b4c134ee65cdadece7a20f3c0643602a4a +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +Fix some typos and bad grammar in buffered-io.c to make the comments +easier to read. + +Signed-off-by: Andreas Gruenbacher +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + fs/iomap/buffered-io.c | 72 +++++++++++++++++++++--------------------- + 1 file changed, 36 insertions(+), 36 deletions(-) + +diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c +index 28cfa7fab023..c1c8cd41ea81 100644 +--- a/fs/iomap/buffered-io.c ++++ b/fs/iomap/buffered-io.c +@@ -36,7 +36,7 @@ static inline struct iomap_page *to_iomap_page(struct page *page) + { + /* + * per-block data is stored in the head page. Callers should +- * not be dealing with tail pages (and if they are, they can ++ * not be dealing with tail pages, and if they are, they can + * call thp_head() first. + */ + VM_BUG_ON_PGFLAGS(PageTail(page), page); +@@ -98,7 +98,7 @@ iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop, + unsigned last = (poff + plen - 1) >> block_bits; + + /* +- * If the block size is smaller than the page size we need to check the ++ * If the block size is smaller than the page size, we need to check the + * per-block uptodate status and adjust the offset and length if needed + * to avoid reading in already uptodate ranges. + */ +@@ -126,7 +126,7 @@ iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop, + } + + /* +- * If the extent spans the block that contains the i_size we need to ++ * If the extent spans the block that contains the i_size, we need to + * handle both halves separately so that we properly zero data in the + * page cache for blocks that are entirely outside of i_size. + */ +@@ -301,7 +301,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, + done: + /* + * Move the caller beyond our range so that it keeps making progress. +- * For that we have to include any leading non-uptodate ranges, but ++ * For that, we have to include any leading non-uptodate ranges, but + * we can skip trailing ones as they will be handled in the next + * iteration. + */ +@@ -338,9 +338,9 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops) + } + + /* +- * Just like mpage_readahead and block_read_full_page we always ++ * Just like mpage_readahead and block_read_full_page, we always + * return 0 and just mark the page as PageError on errors. This +- * should be cleaned up all through the stack eventually. ++ * should be cleaned up throughout the stack eventually. + */ + return 0; + } +@@ -461,7 +461,7 @@ iomap_releasepage(struct page *page, gfp_t gfp_mask) + /* + * mm accommodates an old ext3 case where clean pages might not have had + * the dirty bit cleared. Thus, it can send actual dirty pages to +- * ->releasepage() via shrink_active_list(), skip those here. ++ * ->releasepage() via shrink_active_list(); skip those here. + */ + if (PageDirty(page) || PageWriteback(page)) + return 0; +@@ -476,7 +476,7 @@ iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len) + trace_iomap_invalidatepage(page->mapping->host, offset, len); + + /* +- * If we are invalidating the entire page, clear the dirty state from it ++ * If we're invalidating the entire page, clear the dirty state from it + * and release it to avoid unnecessary buildup of the LRU. + */ + if (offset == 0 && len == PAGE_SIZE) { +@@ -658,13 +658,13 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len, + /* + * The blocks that were entirely written will now be uptodate, so we + * don't have to worry about a readpage reading them and overwriting a +- * partial write. However if we have encountered a short write and only ++ * partial write. However, if we've encountered a short write and only + * partially written into a block, it will not be marked uptodate, so a + * readpage might come in and destroy our partial write. + * +- * Do the simplest thing, and just treat any short write to a non +- * uptodate page as a zero-length write, and force the caller to redo +- * the whole thing. ++ * Do the simplest thing and just treat any short write to a ++ * non-uptodate page as a zero-length write, and force the caller to ++ * redo the whole thing. + */ + if (unlikely(copied < len && !PageUptodate(page))) + return 0; +@@ -752,7 +752,7 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data, + bytes = length; + + /* +- * Bring in the user page that we will copy from _first_. ++ * Bring in the user page that we'll copy from _first_. + * Otherwise there's a nasty deadlock on copying from the + * same page as we're writing to, without it being marked + * up-to-date. +@@ -1161,7 +1161,7 @@ static void iomap_writepage_end_bio(struct bio *bio) + * Submit the final bio for an ioend. + * + * If @error is non-zero, it means that we have a situation where some part of +- * the submission process has failed after we have marked paged for writeback ++ * the submission process has failed after we've marked pages for writeback + * and unlocked them. In this situation, we need to fail the bio instead of + * submitting it. This typically only happens on a filesystem shutdown. + */ +@@ -1176,7 +1176,7 @@ iomap_submit_ioend(struct iomap_writepage_ctx *wpc, struct iomap_ioend *ioend, + error = wpc->ops->prepare_ioend(ioend, error); + if (error) { + /* +- * If we are failing the IO now, just mark the ioend with an ++ * If we're failing the IO now, just mark the ioend with an + * error and finish it. This will run IO completion immediately + * as there is only one reference to the ioend at this point in + * time. +@@ -1218,7 +1218,7 @@ iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc, + /* + * Allocate a new bio, and chain the old bio to the new one. + * +- * Note that we have to do perform the chaining in this unintuitive order ++ * Note that we have to perform the chaining in this unintuitive order + * so that the bi_private linkage is set up in the right direction for the + * traversal in iomap_finish_ioend(). + */ +@@ -1257,7 +1257,7 @@ iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset, + + /* + * Test to see if we have an existing ioend structure that we could append to +- * first, otherwise finish off the current ioend and start another. ++ * first; otherwise finish off the current ioend and start another. + */ + static void + iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page, +@@ -1288,9 +1288,9 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page, + /* + * We implement an immediate ioend submission policy here to avoid needing to + * chain multiple ioends and hence nest mempool allocations which can violate +- * forward progress guarantees we need to provide. The current ioend we are +- * adding blocks to is cached on the writepage context, and if the new block +- * does not append to the cached ioend it will create a new ioend and cache that ++ * the forward progress guarantees we need to provide. The current ioend we're ++ * adding blocks to is cached in the writepage context, and if the new block ++ * doesn't append to the cached ioend, it will create a new ioend and cache that + * instead. + * + * If a new ioend is created and cached, the old ioend is returned and queued +@@ -1352,7 +1352,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc, + if (unlikely(error)) { + /* + * Let the filesystem know what portion of the current page +- * failed to map. If the page wasn't been added to ioend, it ++ * failed to map. If the page hasn't been added to ioend, it + * won't be affected by I/O completion and we must unlock it + * now. + */ +@@ -1369,7 +1369,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc, + unlock_page(page); + + /* +- * Preserve the original error if there was one, otherwise catch ++ * Preserve the original error if there was one; catch + * submission errors here and propagate into subsequent ioend + * submissions. + */ +@@ -1396,8 +1396,8 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc, + /* + * Write out a dirty page. + * +- * For delalloc space on the page we need to allocate space and flush it. +- * For unwritten space on the page we need to start the conversion to ++ * For delalloc space on the page, we need to allocate space and flush it. ++ * For unwritten space on the page, we need to start the conversion to + * regular allocated space. + */ + static int +@@ -1412,7 +1412,7 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data) + trace_iomap_writepage(inode, page_offset(page), PAGE_SIZE); + + /* +- * Refuse to write the page out if we are called from reclaim context. ++ * Refuse to write the page out if we're called from reclaim context. + * + * This avoids stack overflows when called from deeply used stacks in + * random callers for direct reclaim or memcg reclaim. We explicitly +@@ -1457,20 +1457,20 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data) + unsigned offset_into_page = offset & (PAGE_SIZE - 1); + + /* +- * Skip the page if it is fully outside i_size, e.g. due to a +- * truncate operation that is in progress. We must redirty the ++ * Skip the page if it's fully outside i_size, e.g. due to a ++ * truncate operation that's in progress. We must redirty the + * page so that reclaim stops reclaiming it. Otherwise + * iomap_vm_releasepage() is called on it and gets confused. + * +- * Note that the end_index is unsigned long, it would overflow +- * if the given offset is greater than 16TB on 32-bit system +- * and if we do check the page is fully outside i_size or not +- * via "if (page->index >= end_index + 1)" as "end_index + 1" +- * will be evaluated to 0. Hence this page will be redirtied +- * and be written out repeatedly which would result in an +- * infinite loop, the user program that perform this operation +- * will hang. Instead, we can verify this situation by checking +- * if the page to write is totally beyond the i_size or if it's ++ * Note that the end_index is unsigned long. If the given ++ * offset is greater than 16TB on a 32-bit system then if we ++ * checked if the page is fully outside i_size with ++ * "if (page->index >= end_index + 1)", "end_index + 1" would ++ * overflow and evaluate to 0. Hence this page would be ++ * redirtied and written out repeatedly, which would result in ++ * an infinite loop; the user program performing this operation ++ * would hang. Instead, we can detect this situation by ++ * checking if the page is totally beyond i_size or if its + * offset is just equal to the EOF. + */ + if (page->index > end_index || +-- +2.35.3 + diff --git a/patches.suse/iomap-Support-inline-data-with-block-size-page-size.patch b/patches.suse/iomap-Support-inline-data-with-block-size-page-size.patch new file mode 100644 index 0000000..e2be4bf --- /dev/null +++ b/patches.suse/iomap-Support-inline-data-with-block-size-page-size.patch @@ -0,0 +1,96 @@ +From: "Matthew Wilcox (Oracle)" +Date: Mon, 2 Aug 2021 14:45:57 -0700 +Subject: [PATCH] iomap: Support inline data with block size < page size +Git-commit: b405435b419cb660455ba54fd47086216e58fed6 +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +Remove the restriction that inline data must start on a page boundary +in a file. This allows, for example, the first 2KiB to be stored out +of line and the trailing 30 bytes to be stored inline. + +Signed-off-by: Matthew Wilcox (Oracle) +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + fs/iomap/buffered-io.c | 34 ++++++++++++++++------------------ + 1 file changed, 16 insertions(+), 18 deletions(-) + +diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c +index 1d31ff6bfea0..28cfa7fab023 100644 +--- a/fs/iomap/buffered-io.c ++++ b/fs/iomap/buffered-io.c +@@ -209,28 +209,26 @@ static int iomap_read_inline_data(struct inode *inode, struct page *page, + struct iomap *iomap) + { + size_t size = i_size_read(inode) - iomap->offset; ++ size_t poff = offset_in_page(iomap->offset); + void *addr; + + if (PageUptodate(page)) +- return 0; ++ return PAGE_SIZE - poff; + +- /* inline data must start page aligned in the file */ +- if (WARN_ON_ONCE(offset_in_page(iomap->offset))) +- return -EIO; + if (WARN_ON_ONCE(size > PAGE_SIZE - + offset_in_page(iomap->inline_data))) + return -EIO; + if (WARN_ON_ONCE(size > iomap->length)) + return -EIO; +- if (WARN_ON_ONCE(page_has_private(page))) +- return -EIO; ++ if (poff > 0) ++ iomap_page_create(inode, page); + +- addr = kmap_atomic(page); ++ addr = kmap_atomic(page) + poff; + memcpy(addr, iomap->inline_data, size); +- memset(addr + size, 0, PAGE_SIZE - size); ++ memset(addr + size, 0, PAGE_SIZE - poff - size); + kunmap_atomic(addr); +- SetPageUptodate(page); +- return 0; ++ iomap_set_range_uptodate(page, poff, PAGE_SIZE - poff); ++ return PAGE_SIZE - poff; + } + + static inline bool iomap_block_needs_zeroing(struct inode *inode, +@@ -252,13 +250,8 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, + unsigned poff, plen; + sector_t sector; + +- if (iomap->type == IOMAP_INLINE) { +- int ret = iomap_read_inline_data(inode, page, iomap); +- +- if (ret) +- return ret; +- return PAGE_SIZE; +- } ++ if (iomap->type == IOMAP_INLINE) ++ return iomap_read_inline_data(inode, page, iomap); + + /* zero post-eof blocks as the page may be mapped */ + iop = iomap_page_create(inode, page); +@@ -593,10 +586,15 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags, + static int iomap_write_begin_inline(struct inode *inode, + struct page *page, struct iomap *srcmap) + { ++ int ret; ++ + /* needs more work for the tailpacking case; disable for now */ + if (WARN_ON_ONCE(srcmap->offset != 0)) + return -EIO; +- return iomap_read_inline_data(inode, page, srcmap); ++ ret = iomap_read_inline_data(inode, page, srcmap); ++ if (ret < 0) ++ return ret; ++ return 0; + } + + static int +-- +2.35.3 + diff --git a/patches.suse/iomap-add-the-new-iomap_iter-model.patch b/patches.suse/iomap-add-the-new-iomap_iter-model.patch new file mode 100644 index 0000000..3da1667 --- /dev/null +++ b/patches.suse/iomap-add-the-new-iomap_iter-model.patch @@ -0,0 +1,252 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:07 -0700 +Subject: [PATCH] iomap: add the new iomap_iter model +Git-commit: f4b896c213f0752adc828ddc11bd55419ffab248 +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +The iomap_iter struct provides a convenient way to package up and +maintain all the arguments to the various mapping and operation +functions. It is operated on using the iomap_iter() function that +is called in loop until the whole range has been processed. Compared +to the existing iomap_apply() function this avoid an indirect call +for each iteration. + +For now iomap_iter() calls back into the existing ->iomap_begin and +->iomap_end methods, but in the future this could be further optimized +to avoid indirect calls entirely. + +Based on an earlier patch from Matthew Wilcox . + +Signed-off-by: Christoph Hellwig +[djwong: add to apply.c to preserve git history of iomap loop control] +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Acked-by: Hannes Reinecke +--- + fs/iomap/apply.c | 74 ++++++++++++++++++++++++++++++++++++++++++- + fs/iomap/trace.h | 37 +++++++++++++++++++++- + include/linux/iomap.h | 56 ++++++++++++++++++++++++++++++++ + 3 files changed, 165 insertions(+), 2 deletions(-) + +diff --git a/fs/iomap/apply.c b/fs/iomap/apply.c +index 26ab6563181f..e82647aef7ea 100644 +--- a/fs/iomap/apply.c ++++ b/fs/iomap/apply.c +@@ -1,7 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0 + /* + * Copyright (C) 2010 Red Hat, Inc. +- * Copyright (c) 2016-2018 Christoph Hellwig. ++ * Copyright (c) 2016-2021 Christoph Hellwig. + */ + #include + #include +@@ -97,3 +97,75 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags, + + return written ? written : ret; + } ++ ++static inline int iomap_iter_advance(struct iomap_iter *iter) ++{ ++ /* handle the previous iteration (if any) */ ++ if (iter->iomap.length) { ++ if (iter->processed <= 0) ++ return iter->processed; ++ if (WARN_ON_ONCE(iter->processed > iomap_length(iter))) ++ return -EIO; ++ iter->pos += iter->processed; ++ iter->len -= iter->processed; ++ if (!iter->len) ++ return 0; ++ } ++ ++ /* clear the state for the next iteration */ ++ iter->processed = 0; ++ memset(&iter->iomap, 0, sizeof(iter->iomap)); ++ memset(&iter->srcmap, 0, sizeof(iter->srcmap)); ++ return 1; ++} ++ ++static inline void iomap_iter_done(struct iomap_iter *iter) ++{ ++ WARN_ON_ONCE(iter->iomap.offset > iter->pos); ++ WARN_ON_ONCE(iter->iomap.length == 0); ++ WARN_ON_ONCE(iter->iomap.offset + iter->iomap.length <= iter->pos); ++ ++ trace_iomap_iter_dstmap(iter->inode, &iter->iomap); ++ if (iter->srcmap.type != IOMAP_HOLE) ++ trace_iomap_iter_srcmap(iter->inode, &iter->srcmap); ++} ++ ++/** ++ * iomap_iter - iterate over a ranges in a file ++ * @iter: iteration structue ++ * @ops: iomap ops provided by the file system ++ * ++ * Iterate over filesystem-provided space mappings for the provided file range. ++ * ++ * This function handles cleanup of resources acquired for iteration when the ++ * filesystem indicates there are no more space mappings, which means that this ++ * function must be called in a loop that continues as long it returns a ++ * positive value. If 0 or a negative value is returned, the caller must not ++ * return to the loop body. Within a loop body, there are two ways to break out ++ * of the loop body: leave @iter.processed unchanged, or set it to a negative ++ * errno. ++ */ ++int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops) ++{ ++ int ret; ++ ++ if (iter->iomap.length && ops->iomap_end) { ++ ret = ops->iomap_end(iter->inode, iter->pos, iomap_length(iter), ++ iter->processed > 0 ? iter->processed : 0, ++ iter->flags, &iter->iomap); ++ if (ret < 0 && !iter->processed) ++ return ret; ++ } ++ ++ trace_iomap_iter(iter, ops, _RET_IP_); ++ ret = iomap_iter_advance(iter); ++ if (ret <= 0) ++ return ret; ++ ++ ret = ops->iomap_begin(iter->inode, iter->pos, iter->len, iter->flags, ++ &iter->iomap, &iter->srcmap); ++ if (ret < 0) ++ return ret; ++ iomap_iter_done(iter); ++ return 1; ++} +diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h +index e9cd5cc0d6ba..1012d7af6b68 100644 +--- a/fs/iomap/trace.h ++++ b/fs/iomap/trace.h +@@ -1,6 +1,6 @@ + /* SPDX-License-Identifier: GPL-2.0 */ + /* +- * Copyright (c) 2009-2019 Christoph Hellwig ++ * Copyright (c) 2009-2021 Christoph Hellwig + * + * NOTE: none of these tracepoints shall be considered a stable kernel ABI + * as they can change at any time. +@@ -140,6 +140,8 @@ DEFINE_EVENT(iomap_class, name, \ + TP_ARGS(inode, iomap)) + DEFINE_IOMAP_EVENT(iomap_apply_dstmap); + DEFINE_IOMAP_EVENT(iomap_apply_srcmap); ++DEFINE_IOMAP_EVENT(iomap_iter_dstmap); ++DEFINE_IOMAP_EVENT(iomap_iter_srcmap); + + TRACE_EVENT(iomap_apply, + TP_PROTO(struct inode *inode, loff_t pos, loff_t length, +@@ -179,6 +181,39 @@ TRACE_EVENT(iomap_apply, + __entry->actor) + ); + ++TRACE_EVENT(iomap_iter, ++ TP_PROTO(struct iomap_iter *iter, const void *ops, ++ unsigned long caller), ++ TP_ARGS(iter, ops, caller), ++ TP_STRUCT__entry( ++ __field(dev_t, dev) ++ __field(u64, ino) ++ __field(loff_t, pos) ++ __field(loff_t, length) ++ __field(unsigned int, flags) ++ __field(const void *, ops) ++ __field(unsigned long, caller) ++ ), ++ TP_fast_assign( ++ __entry->dev = iter->inode->i_sb->s_dev; ++ __entry->ino = iter->inode->i_ino; ++ __entry->pos = iter->pos; ++ __entry->length = iomap_length(iter); ++ __entry->flags = iter->flags; ++ __entry->ops = ops; ++ __entry->caller = caller; ++ ), ++ TP_printk("dev %d:%d ino 0x%llx pos %lld length %lld flags %s (0x%x) ops %ps caller %pS", ++ MAJOR(__entry->dev), MINOR(__entry->dev), ++ __entry->ino, ++ __entry->pos, ++ __entry->length, ++ __print_flags(__entry->flags, "|", IOMAP_FLAGS_STRINGS), ++ __entry->flags, ++ __entry->ops, ++ (void *)__entry->caller) ++); ++ + #endif /* _IOMAP_TRACE_H */ + + #undef TRACE_INCLUDE_PATH +diff --git a/include/linux/iomap.h b/include/linux/iomap.h +index 76bfc5d16ef4..aac4176ea164 100644 +--- a/include/linux/iomap.h ++++ b/include/linux/iomap.h +@@ -161,6 +161,62 @@ struct iomap_ops { + ssize_t written, unsigned flags, struct iomap *iomap); + }; + ++/** ++ * struct iomap_iter - Iterate through a range of a file ++ * @inode: Set at the start of the iteration and should not change. ++ * @pos: The current file position we are operating on. It is updated by ++ * calls to iomap_iter(). Treat as read-only in the body. ++ * @len: The remaining length of the file segment we're operating on. ++ * It is updated at the same time as @pos. ++ * @processed: The number of bytes processed by the body in the most recent ++ * iteration, or a negative errno. 0 causes the iteration to stop. ++ * @flags: Zero or more of the iomap_begin flags above. ++ * @iomap: Map describing the I/O iteration ++ * @srcmap: Source map for COW operations ++ */ ++struct iomap_iter { ++ struct inode *inode; ++ loff_t pos; ++ u64 len; ++ s64 processed; ++ unsigned flags; ++ struct iomap iomap; ++ struct iomap srcmap; ++}; ++ ++int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops); ++ ++/** ++ * iomap_length - length of the current iomap iteration ++ * @iter: iteration structure ++ * ++ * Returns the length that the operation applies to for the current iteration. ++ */ ++static inline u64 iomap_length(const struct iomap_iter *iter) ++{ ++ u64 end = iter->iomap.offset + iter->iomap.length; ++ ++ if (iter->srcmap.type != IOMAP_HOLE) ++ end = min(end, iter->srcmap.offset + iter->srcmap.length); ++ return min(iter->len, end - iter->pos); ++} ++ ++/** ++ * iomap_iter_srcmap - return the source map for the current iomap iteration ++ * @i: iteration structure ++ * ++ * Write operations on file systems with reflink support might require a ++ * source and a destination map. This function retourns the source map ++ * for a given operation, which may or may no be identical to the destination ++ * map in &i->iomap. ++ */ ++static inline struct iomap *iomap_iter_srcmap(struct iomap_iter *i) ++{ ++ if (i->srcmap.type != IOMAP_HOLE) ++ return &i->srcmap; ++ return &i->iomap; ++} ++ + /* + * Main iomap iterator function. + */ +-- +2.35.3 + diff --git a/patches.suse/iomap-don-t-try-to-poll-multi-bio-I-Os-in-__iomap_di.patch b/patches.suse/iomap-don-t-try-to-poll-multi-bio-I-Os-in-__iomap_di.patch new file mode 100644 index 0000000..e53dc31 --- /dev/null +++ b/patches.suse/iomap-don-t-try-to-poll-multi-bio-I-Os-in-__iomap_di.patch @@ -0,0 +1,69 @@ +From: Christoph Hellwig +Date: Tue, 12 Oct 2021 13:12:13 +0200 +Subject: [PATCH] iomap: don't try to poll multi-bio I/Os in __iomap_dio_rw +Git-commit: f79d474905fec0bfae1244e75571b7916fe02ea2 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +If an iocb is split into multiple bios we can't poll for both. So don't +bother to even try to poll in that case. + +Signed-off-by: Christoph Hellwig +Tested-by: Mark Wunderlich +Link: https://lore.kernel.org/r/20211012111226.760968-4-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/iomap/direct-io.c | 21 ++++++++++++++++++++- + 1 file changed, 20 insertions(+), 1 deletion(-) + +diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c +index 4ecd255e0511..560ae967f70e 100644 +--- a/fs/iomap/direct-io.c ++++ b/fs/iomap/direct-io.c +@@ -282,6 +282,13 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, + if (!iov_iter_count(dio->submit.iter)) + goto out; + ++ /* ++ * We can only poll for single bio I/Os. ++ */ ++ if (need_zeroout || ++ ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) ++ dio->iocb->ki_flags &= ~IOCB_HIPRI; ++ + if (need_zeroout) { + /* zero out from the start of the block to the write offset */ + pad = pos & (fs_block_size - 1); +@@ -339,6 +346,11 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, + + nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, + BIO_MAX_VECS); ++ /* ++ * We can only poll for single bio I/Os. ++ */ ++ if (nr_pages) ++ dio->iocb->ki_flags &= ~IOCB_HIPRI; + iomap_dio_submit_bio(iter, dio, bio, pos); + pos += n; + } while (nr_pages); +@@ -565,8 +577,15 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + inode_dio_begin(inode); + + blk_start_plug(&plug); +- while ((ret = iomap_iter(&iomi, ops)) > 0) ++ while ((ret = iomap_iter(&iomi, ops)) > 0) { + iomi.processed = iomap_dio_iter(&iomi, dio); ++ ++ /* ++ * We can only poll for single bio I/Os. ++ */ ++ iocb->ki_flags &= ~IOCB_HIPRI; ++ } ++ + blk_finish_plug(&plug); + + /* +-- +2.35.3 + diff --git a/patches.suse/iomap-fix-a-trivial-comment-typo-in-trace.h.patch b/patches.suse/iomap-fix-a-trivial-comment-typo-in-trace.h.patch new file mode 100644 index 0000000..7acbd03 --- /dev/null +++ b/patches.suse/iomap-fix-a-trivial-comment-typo-in-trace.h.patch @@ -0,0 +1,31 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:03 -0700 +Subject: [PATCH] iomap: fix a trivial comment typo in trace.h +Git-commit: d9d381f3ef5b2a4bee3e98d7b9f3b09cf00119c0 +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + fs/iomap/trace.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h +index fdc7ae388476..e9cd5cc0d6ba 100644 +--- a/fs/iomap/trace.h ++++ b/fs/iomap/trace.h +@@ -2,7 +2,7 @@ + /* + * Copyright (c) 2009-2019 Christoph Hellwig + * +- * NOTE: none of these tracepoints shall be consider a stable kernel ABI ++ * NOTE: none of these tracepoints shall be considered a stable kernel ABI + * as they can change at any time. + */ + #undef TRACE_SYSTEM +-- +2.35.3 + diff --git a/patches.suse/iomap-fix-the-iomap_readpage_actor-return-value-for-.patch b/patches.suse/iomap-fix-the-iomap_readpage_actor-return-value-for-.patch new file mode 100644 index 0000000..b2a42ce --- /dev/null +++ b/patches.suse/iomap-fix-the-iomap_readpage_actor-return-value-for-.patch @@ -0,0 +1,45 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:07 -0700 +Subject: [PATCH] iomap: fix the iomap_readpage_actor return value for inline + data +Git-commit: 740499c78408f75c4e76feac848177cb0d0ccf4f +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +The actor should never return a larger value than the length that was +passed in. The current code handles this gracefully, but the opcoming +iter model will be more picky. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + fs/iomap/buffered-io.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c +index 0273aede8b1d..8418dffe8acf 100644 +--- a/fs/iomap/buffered-io.c ++++ b/fs/iomap/buffered-io.c +@@ -205,7 +205,7 @@ struct iomap_readpage_ctx { + struct readahead_control *rac; + }; + +-static int iomap_read_inline_data(struct inode *inode, struct page *page, ++static loff_t iomap_read_inline_data(struct inode *inode, struct page *page, + const struct iomap *iomap) + { + size_t size = i_size_read(inode) - iomap->offset; +@@ -253,7 +253,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, + sector_t sector; + + if (iomap->type == IOMAP_INLINE) +- return iomap_read_inline_data(inode, page, iomap); ++ return min(iomap_read_inline_data(inode, page, iomap), length); + + /* zero post-eof blocks as the page may be mapped */ + iop = iomap_page_create(inode, page); +-- +2.35.3 + diff --git a/patches.suse/iomap-mark-the-iomap-argument-to-iomap_inline_data-c.patch b/patches.suse/iomap-mark-the-iomap-argument-to-iomap_inline_data-c.patch new file mode 100644 index 0000000..e045712 --- /dev/null +++ b/patches.suse/iomap-mark-the-iomap-argument-to-iomap_inline_data-c.patch @@ -0,0 +1,31 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:04 -0700 +Subject: [PATCH] iomap: mark the iomap argument to iomap_inline_data const +Git-commit: 4495c33e4d302b8d3a9eb483c06b2687d27dab9d +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + include/linux/iomap.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/linux/iomap.h b/include/linux/iomap.h +index 8030483331d1..560247130357 100644 +--- a/include/linux/iomap.h ++++ b/include/linux/iomap.h +@@ -99,7 +99,7 @@ static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos) + /* + * Returns the inline data pointer for logical offset @pos. + */ +-static inline void *iomap_inline_data(struct iomap *iomap, loff_t pos) ++static inline void *iomap_inline_data(const struct iomap *iomap, loff_t pos) + { + return iomap->inline_data + pos - iomap->offset; + } +-- +2.35.3 + diff --git a/patches.suse/iomap-mark-the-iomap-argument-to-iomap_inline_data_v.patch b/patches.suse/iomap-mark-the-iomap-argument-to-iomap_inline_data_v.patch new file mode 100644 index 0000000..49c96e4 --- /dev/null +++ b/patches.suse/iomap-mark-the-iomap-argument-to-iomap_inline_data_v.patch @@ -0,0 +1,32 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:05 -0700 +Subject: [PATCH] iomap: mark the iomap argument to iomap_inline_data_valid + const +Git-commit: e3c4ffb0c2219e720acdc6072c6ddaccac5cab79 +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + include/linux/iomap.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/linux/iomap.h b/include/linux/iomap.h +index 560247130357..76bfc5d16ef4 100644 +--- a/include/linux/iomap.h ++++ b/include/linux/iomap.h +@@ -109,7 +109,7 @@ static inline void *iomap_inline_data(const struct iomap *iomap, loff_t pos) + * This is used to guard against accessing data beyond the page inline_data + * points at. + */ +-static inline bool iomap_inline_data_valid(struct iomap *iomap) ++static inline bool iomap_inline_data_valid(const struct iomap *iomap) + { + return iomap->length <= PAGE_SIZE - offset_in_page(iomap->inline_data); + } +-- +2.35.3 + diff --git a/patches.suse/iomap-mark-the-iomap-argument-to-iomap_read_inline_d.patch b/patches.suse/iomap-mark-the-iomap-argument-to-iomap_read_inline_d.patch new file mode 100644 index 0000000..2cf26b6 --- /dev/null +++ b/patches.suse/iomap-mark-the-iomap-argument-to-iomap_read_inline_d.patch @@ -0,0 +1,35 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:06 -0700 +Subject: [PATCH] iomap: mark the iomap argument to iomap_read_inline_data + const +Git-commit: 78c64b00f842ac704d0612553dd124c31b4afceb +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +iomap_read_inline_data never modifies the passed in iomap, so mark +it const. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + fs/iomap/buffered-io.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c +index 7e794a30806b..b8a1ba3fb957 100644 +--- a/fs/iomap/buffered-io.c ++++ b/fs/iomap/buffered-io.c +@@ -206,7 +206,7 @@ struct iomap_readpage_ctx { + }; + + static int iomap_read_inline_data(struct inode *inode, struct page *page, +- struct iomap *iomap) ++ const struct iomap *iomap) + { + size_t size = i_size_read(inode) - iomap->offset; + size_t poff = offset_in_page(iomap->offset); +-- +2.35.3 + diff --git a/patches.suse/iomap-mark-the-iomap-argument-to-iomap_read_page_syn.patch b/patches.suse/iomap-mark-the-iomap-argument-to-iomap_read_page_syn.patch new file mode 100644 index 0000000..6555f88 --- /dev/null +++ b/patches.suse/iomap-mark-the-iomap-argument-to-iomap_read_page_syn.patch @@ -0,0 +1,34 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:06 -0700 +Subject: [PATCH] iomap: mark the iomap argument to iomap_read_page_sync const +Git-commit: 1acd9e9c015b389aa3201a977454efb92e36806c +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +iomap_read_page_sync never modifies the passed in iomap, so mark +it const. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + fs/iomap/buffered-io.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c +index b8a1ba3fb957..0273aede8b1d 100644 +--- a/fs/iomap/buffered-io.c ++++ b/fs/iomap/buffered-io.c +@@ -531,7 +531,7 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) + + static int + iomap_read_page_sync(loff_t block_start, struct page *page, unsigned poff, +- unsigned plen, struct iomap *iomap) ++ unsigned plen, const struct iomap *iomap) + { + struct bio_vec bvec; + struct bio bio; +-- +2.35.3 + diff --git a/patches.suse/iomap-mark-the-iomap-argument-to-iomap_sector-const.patch b/patches.suse/iomap-mark-the-iomap-argument-to-iomap_sector-const.patch new file mode 100644 index 0000000..6df7047 --- /dev/null +++ b/patches.suse/iomap-mark-the-iomap-argument-to-iomap_sector-const.patch @@ -0,0 +1,32 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:04 -0700 +Subject: [PATCH] iomap: mark the iomap argument to iomap_sector const +Git-commit: 66b8165ed4b5a2e7ddb7b9bbf3586b7ccdd86a1c +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + include/linux/iomap.h | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/include/linux/iomap.h b/include/linux/iomap.h +index 72696a55c137..8030483331d1 100644 +--- a/include/linux/iomap.h ++++ b/include/linux/iomap.h +@@ -91,8 +91,7 @@ struct iomap { + const struct iomap_page_ops *page_ops; + }; + +-static inline sector_t +-iomap_sector(struct iomap *iomap, loff_t pos) ++static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos) + { + return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT; + } +-- +2.35.3 + diff --git a/patches.suse/iomap-remove-the-iomap-arguments-to-page_-prepare-do.patch b/patches.suse/iomap-remove-the-iomap-arguments-to-page_-prepare-do.patch new file mode 100644 index 0000000..9858dbb --- /dev/null +++ b/patches.suse/iomap-remove-the-iomap-arguments-to-page_-prepare-do.patch @@ -0,0 +1,93 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:03 -0700 +Subject: [PATCH] iomap: remove the iomap arguments to ->page_{prepare,done} +Git-commit: 1d25d0aecfcd480b1a997a709c1b37e56ddc3c38 +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +These aren't actually used by the only instance implementing the methods. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + fs/gfs2/bmap.c | 5 ++--- + fs/iomap/buffered-io.c | 6 +++--- + include/linux/iomap.h | 5 ++--- + 3 files changed, 7 insertions(+), 9 deletions(-) + +diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c +index ed8b67b21718..5414c2c33580 100644 +--- a/fs/gfs2/bmap.c ++++ b/fs/gfs2/bmap.c +@@ -1002,7 +1002,7 @@ static void gfs2_write_unlock(struct inode *inode) + } + + static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos, +- unsigned len, struct iomap *iomap) ++ unsigned len) + { + unsigned int blockmask = i_blocksize(inode) - 1; + struct gfs2_sbd *sdp = GFS2_SB(inode); +@@ -1013,8 +1013,7 @@ static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos, + } + + static void gfs2_iomap_page_done(struct inode *inode, loff_t pos, +- unsigned copied, struct page *page, +- struct iomap *iomap) ++ unsigned copied, struct page *page) + { + struct gfs2_trans *tr = current->journal_info; + struct gfs2_inode *ip = GFS2_I(inode); +diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c +index 43b9354bac3a..7e794a30806b 100644 +--- a/fs/iomap/buffered-io.c ++++ b/fs/iomap/buffered-io.c +@@ -615,7 +615,7 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, + return -EINTR; + + if (page_ops && page_ops->page_prepare) { +- status = page_ops->page_prepare(inode, pos, len, iomap); ++ status = page_ops->page_prepare(inode, pos, len); + if (status) + return status; + } +@@ -648,7 +648,7 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, + + out_no_page: + if (page_ops && page_ops->page_done) +- page_ops->page_done(inode, pos, 0, NULL, iomap); ++ page_ops->page_done(inode, pos, 0, NULL); + return status; + } + +@@ -724,7 +724,7 @@ static size_t iomap_write_end(struct inode *inode, loff_t pos, size_t len, + if (old_size < pos) + pagecache_isize_extended(inode, old_size, pos); + if (page_ops && page_ops->page_done) +- page_ops->page_done(inode, pos, ret, page, iomap); ++ page_ops->page_done(inode, pos, ret, page); + put_page(page); + + if (ret < len) +diff --git a/include/linux/iomap.h b/include/linux/iomap.h +index b8ec145b2975..72696a55c137 100644 +--- a/include/linux/iomap.h ++++ b/include/linux/iomap.h +@@ -126,10 +126,9 @@ static inline bool iomap_inline_data_valid(struct iomap *iomap) + * associated page could not be obtained. + */ + struct iomap_page_ops { +- int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len, +- struct iomap *iomap); ++ int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len); + void (*page_done)(struct inode *inode, loff_t pos, unsigned copied, +- struct page *page, struct iomap *iomap); ++ struct page *page); + }; + + /* +-- +2.35.3 + diff --git a/patches.suse/iomap-support-reading-inline-data-from-non-zero-pos.patch b/patches.suse/iomap-support-reading-inline-data-from-non-zero-pos.patch new file mode 100644 index 0000000..da04725 --- /dev/null +++ b/patches.suse/iomap-support-reading-inline-data-from-non-zero-pos.patch @@ -0,0 +1,191 @@ +From: Gao Xiang +Date: Tue, 3 Aug 2021 09:38:22 -0700 +Subject: [PATCH] iomap: support reading inline data from non-zero pos +Git-commit: 69f4a26c1e0c7c5e5e77c5bd7b271743c124c545 +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +The existing inline data support only works for cases where the entire +file is stored as inline data. For larger files, EROFS stores the +initial blocks separately and the remainder of the file ("file tail") +adjacent to the inode. Generalise inline data to allow reading the +inline file tail. Tails may not cross a page boundary in memory. + +We currently have no filesystems that support tails and writing, +so that case is currently disabled (see iomap_write_begin_inline). + +Reviewed-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Reviewed-by: Matthew Wilcox (Oracle) +Signed-off-by: Andreas Gruenbacher +Signed-off-by: Gao Xiang +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + fs/iomap/buffered-io.c | 42 ++++++++++++++++++++++++++++++------------ + fs/iomap/direct-io.c | 10 ++++++---- + include/linux/iomap.h | 18 ++++++++++++++++++ + 3 files changed, 54 insertions(+), 16 deletions(-) + +diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c +index a463b41c0a16..1d31ff6bfea0 100644 +--- a/fs/iomap/buffered-io.c ++++ b/fs/iomap/buffered-io.c +@@ -205,25 +205,32 @@ struct iomap_readpage_ctx { + struct readahead_control *rac; + }; + +-static void +-iomap_read_inline_data(struct inode *inode, struct page *page, ++static int iomap_read_inline_data(struct inode *inode, struct page *page, + struct iomap *iomap) + { +- size_t size = i_size_read(inode); ++ size_t size = i_size_read(inode) - iomap->offset; + void *addr; + + if (PageUptodate(page)) +- return; ++ return 0; + +- BUG_ON(page_has_private(page)); +- BUG_ON(page->index); +- BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data)); ++ /* inline data must start page aligned in the file */ ++ if (WARN_ON_ONCE(offset_in_page(iomap->offset))) ++ return -EIO; ++ if (WARN_ON_ONCE(size > PAGE_SIZE - ++ offset_in_page(iomap->inline_data))) ++ return -EIO; ++ if (WARN_ON_ONCE(size > iomap->length)) ++ return -EIO; ++ if (WARN_ON_ONCE(page_has_private(page))) ++ return -EIO; + + addr = kmap_atomic(page); + memcpy(addr, iomap->inline_data, size); + memset(addr + size, 0, PAGE_SIZE - size); + kunmap_atomic(addr); + SetPageUptodate(page); ++ return 0; + } + + static inline bool iomap_block_needs_zeroing(struct inode *inode, +@@ -246,8 +253,10 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, + sector_t sector; + + if (iomap->type == IOMAP_INLINE) { +- WARN_ON_ONCE(pos); +- iomap_read_inline_data(inode, page, iomap); ++ int ret = iomap_read_inline_data(inode, page, iomap); ++ ++ if (ret) ++ return ret; + return PAGE_SIZE; + } + +@@ -581,6 +590,15 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags, + return 0; + } + ++static int iomap_write_begin_inline(struct inode *inode, ++ struct page *page, struct iomap *srcmap) ++{ ++ /* needs more work for the tailpacking case; disable for now */ ++ if (WARN_ON_ONCE(srcmap->offset != 0)) ++ return -EIO; ++ return iomap_read_inline_data(inode, page, srcmap); ++} ++ + static int + iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, + struct page **pagep, struct iomap *iomap, struct iomap *srcmap) +@@ -610,7 +628,7 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, + } + + if (srcmap->type == IOMAP_INLINE) +- iomap_read_inline_data(inode, page, srcmap); ++ status = iomap_write_begin_inline(inode, page, srcmap); + else if (iomap->flags & IOMAP_F_BUFFER_HEAD) + status = __block_write_begin_int(page, pos, len, NULL, srcmap); + else +@@ -663,11 +681,11 @@ static size_t iomap_write_end_inline(struct inode *inode, struct page *page, + void *addr; + + WARN_ON_ONCE(!PageUptodate(page)); +- BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data)); ++ BUG_ON(!iomap_inline_data_valid(iomap)); + + flush_dcache_page(page); + addr = kmap_atomic(page); +- memcpy(iomap->inline_data + pos, addr + pos, copied); ++ memcpy(iomap_inline_data(iomap, pos), addr + pos, copied); + kunmap_atomic(addr); + + mark_inode_dirty(inode); +diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c +index 9398b8c31323..41ccbfc9dc82 100644 +--- a/fs/iomap/direct-io.c ++++ b/fs/iomap/direct-io.c +@@ -378,23 +378,25 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length, + struct iomap_dio *dio, struct iomap *iomap) + { + struct iov_iter *iter = dio->submit.iter; ++ void *inline_data = iomap_inline_data(iomap, pos); + size_t copied; + +- BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data)); ++ if (WARN_ON_ONCE(!iomap_inline_data_valid(iomap))) ++ return -EIO; + + if (dio->flags & IOMAP_DIO_WRITE) { + loff_t size = inode->i_size; + + if (pos > size) +- memset(iomap->inline_data + size, 0, pos - size); +- copied = copy_from_iter(iomap->inline_data + pos, length, iter); ++ memset(iomap_inline_data(iomap, size), 0, pos - size); ++ copied = copy_from_iter(inline_data, length, iter); + if (copied) { + if (pos + copied > size) + i_size_write(inode, pos + copied); + mark_inode_dirty(inode); + } + } else { +- copied = copy_to_iter(iomap->inline_data + pos, length, iter); ++ copied = copy_to_iter(inline_data, length, iter); + } + dio->size += copied; + return copied; +diff --git a/include/linux/iomap.h b/include/linux/iomap.h +index 479c1da3e221..b8ec145b2975 100644 +--- a/include/linux/iomap.h ++++ b/include/linux/iomap.h +@@ -97,6 +97,24 @@ iomap_sector(struct iomap *iomap, loff_t pos) + return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT; + } + ++/* ++ * Returns the inline data pointer for logical offset @pos. ++ */ ++static inline void *iomap_inline_data(struct iomap *iomap, loff_t pos) ++{ ++ return iomap->inline_data + pos - iomap->offset; ++} ++ ++/* ++ * Check if the mapping's length is within the valid range for inline data. ++ * This is used to guard against accessing data beyond the page inline_data ++ * points at. ++ */ ++static inline bool iomap_inline_data_valid(struct iomap *iomap) ++{ ++ return iomap->length <= PAGE_SIZE - offset_in_page(iomap->inline_data); ++} ++ + /* + * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare + * and page_done will be called for each page written to. This only applies to +-- +2.35.3 + diff --git a/patches.suse/iomap-switch-__iomap_dio_rw-to-use-iomap_iter.patch b/patches.suse/iomap-switch-__iomap_dio_rw-to-use-iomap_iter.patch new file mode 100644 index 0000000..66907ce --- /dev/null +++ b/patches.suse/iomap-switch-__iomap_dio_rw-to-use-iomap_iter.patch @@ -0,0 +1,416 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:10 -0700 +Subject: [PATCH] iomap: switch __iomap_dio_rw to use iomap_iter +Git-commit: a6d3d49587d10d23189675fce11b332a915081ff +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +Switch __iomap_dio_rw to use iomap_iter. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + fs/btrfs/inode.c | 5 +- + fs/iomap/direct-io.c | 164 +++++++++++++++++++++--------------------- + include/linux/iomap.h | 4 +- + 3 files changed, 86 insertions(+), 87 deletions(-) + +diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c +index 0117d867ecf8..3b0595e8bdd9 100644 +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -8194,9 +8194,10 @@ static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio, + return dip; + } + +-static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap, ++static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter, + struct bio *dio_bio, loff_t file_offset) + { ++ struct inode *inode = iter->inode; + const bool write = (btrfs_op(dio_bio) == BTRFS_MAP_WRITE); + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + const bool raid56 = (btrfs_data_alloc_profile(fs_info) & +@@ -8212,7 +8213,7 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap, + int ret; + blk_status_t status; + struct btrfs_io_geometry geom; +- struct btrfs_dio_data *dio_data = iomap->private; ++ struct btrfs_dio_data *dio_data = iter->iomap.private; + struct extent_map *em = NULL; + + dip = btrfs_create_dio_private(dio_bio, inode, file_offset); +diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c +index 41ccbfc9dc82..4ecd255e0511 100644 +--- a/fs/iomap/direct-io.c ++++ b/fs/iomap/direct-io.c +@@ -1,7 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0 + /* + * Copyright (C) 2010 Red Hat, Inc. +- * Copyright (c) 2016-2018 Christoph Hellwig. ++ * Copyright (c) 2016-2021 Christoph Hellwig. + */ + #include + #include +@@ -59,19 +59,17 @@ int iomap_dio_iopoll(struct kiocb *kiocb, bool spin) + } + EXPORT_SYMBOL_GPL(iomap_dio_iopoll); + +-static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap, +- struct bio *bio, loff_t pos) ++static void iomap_dio_submit_bio(const struct iomap_iter *iter, ++ struct iomap_dio *dio, struct bio *bio, loff_t pos) + { + atomic_inc(&dio->ref); + + if (dio->iocb->ki_flags & IOCB_HIPRI) + bio_set_polled(bio, dio->iocb); + +- dio->submit.last_queue = bdev_get_queue(iomap->bdev); ++ dio->submit.last_queue = bdev_get_queue(iter->iomap.bdev); + if (dio->dops && dio->dops->submit_io) +- dio->submit.cookie = dio->dops->submit_io( +- file_inode(dio->iocb->ki_filp), +- iomap, bio, pos); ++ dio->submit.cookie = dio->dops->submit_io(iter, bio, pos); + else + dio->submit.cookie = submit_bio(bio); + } +@@ -181,24 +179,23 @@ static void iomap_dio_bio_end_io(struct bio *bio) + } + } + +-static void +-iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos, +- unsigned len) ++static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio, ++ loff_t pos, unsigned len) + { + struct page *page = ZERO_PAGE(0); + int flags = REQ_SYNC | REQ_IDLE; + struct bio *bio; + + bio = bio_alloc(GFP_KERNEL, 1); +- bio_set_dev(bio, iomap->bdev); +- bio->bi_iter.bi_sector = iomap_sector(iomap, pos); ++ bio_set_dev(bio, iter->iomap.bdev); ++ bio->bi_iter.bi_sector = iomap_sector(&iter->iomap, pos); + bio->bi_private = dio; + bio->bi_end_io = iomap_dio_bio_end_io; + + get_page(page); + __bio_add_page(bio, page, len, 0); + bio_set_op_attrs(bio, REQ_OP_WRITE, flags); +- iomap_dio_submit_bio(dio, iomap, bio, pos); ++ iomap_dio_submit_bio(iter, dio, bio, pos); + } + + /* +@@ -206,8 +203,8 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos, + * mapping, and whether or not we want FUA. Note that we can end up + * clearing the WRITE_FUA flag in the dio request. + */ +-static inline unsigned int +-iomap_dio_bio_opflags(struct iomap_dio *dio, struct iomap *iomap, bool use_fua) ++static inline unsigned int iomap_dio_bio_opflags(struct iomap_dio *dio, ++ const struct iomap *iomap, bool use_fua) + { + unsigned int opflags = REQ_SYNC | REQ_IDLE; + +@@ -229,13 +226,16 @@ iomap_dio_bio_opflags(struct iomap_dio *dio, struct iomap *iomap, bool use_fua) + return opflags; + } + +-static loff_t +-iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, +- struct iomap_dio *dio, struct iomap *iomap) ++static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, ++ struct iomap_dio *dio) + { ++ const struct iomap *iomap = &iter->iomap; ++ struct inode *inode = iter->inode; + unsigned int blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev)); + unsigned int fs_block_size = i_blocksize(inode), pad; + unsigned int align = iov_iter_alignment(dio->submit.iter); ++ loff_t length = iomap_length(iter); ++ loff_t pos = iter->pos; + unsigned int bio_opf; + struct bio *bio; + bool need_zeroout = false; +@@ -286,7 +286,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, + /* zero out from the start of the block to the write offset */ + pad = pos & (fs_block_size - 1); + if (pad) +- iomap_dio_zero(dio, iomap, pos - pad, pad); ++ iomap_dio_zero(iter, dio, pos - pad, pad); + } + + /* +@@ -339,7 +339,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, + + nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, + BIO_MAX_VECS); +- iomap_dio_submit_bio(dio, iomap, bio, pos); ++ iomap_dio_submit_bio(iter, dio, bio, pos); + pos += n; + } while (nr_pages); + +@@ -355,7 +355,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, + /* zero out from the end of the write to the end of the block */ + pad = pos & (fs_block_size - 1); + if (pad) +- iomap_dio_zero(dio, iomap, pos, fs_block_size - pad); ++ iomap_dio_zero(iter, dio, pos, fs_block_size - pad); + } + out: + /* Undo iter limitation to current extent */ +@@ -365,35 +365,38 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, + return ret; + } + +-static loff_t +-iomap_dio_hole_actor(loff_t length, struct iomap_dio *dio) ++static loff_t iomap_dio_hole_iter(const struct iomap_iter *iter, ++ struct iomap_dio *dio) + { +- length = iov_iter_zero(length, dio->submit.iter); ++ loff_t length = iov_iter_zero(iomap_length(iter), dio->submit.iter); ++ + dio->size += length; + return length; + } + +-static loff_t +-iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length, +- struct iomap_dio *dio, struct iomap *iomap) ++static loff_t iomap_dio_inline_iter(const struct iomap_iter *iomi, ++ struct iomap_dio *dio) + { ++ const struct iomap *iomap = &iomi->iomap; + struct iov_iter *iter = dio->submit.iter; +- void *inline_data = iomap_inline_data(iomap, pos); ++ void *inline_data = iomap_inline_data(iomap, iomi->pos); ++ loff_t length = iomap_length(iomi); ++ loff_t pos = iomi->pos; + size_t copied; + + if (WARN_ON_ONCE(!iomap_inline_data_valid(iomap))) + return -EIO; + + if (dio->flags & IOMAP_DIO_WRITE) { +- loff_t size = inode->i_size; ++ loff_t size = iomi->inode->i_size; + + if (pos > size) + memset(iomap_inline_data(iomap, size), 0, pos - size); + copied = copy_from_iter(inline_data, length, iter); + if (copied) { + if (pos + copied > size) +- i_size_write(inode, pos + copied); +- mark_inode_dirty(inode); ++ i_size_write(iomi->inode, pos + copied); ++ mark_inode_dirty(iomi->inode); + } + } else { + copied = copy_to_iter(inline_data, length, iter); +@@ -402,30 +405,27 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length, + return copied; + } + +-static loff_t +-iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, +- void *data, struct iomap *iomap, struct iomap *srcmap) ++static loff_t iomap_dio_iter(const struct iomap_iter *iter, ++ struct iomap_dio *dio) + { +- struct iomap_dio *dio = data; +- +- switch (iomap->type) { ++ switch (iter->iomap.type) { + case IOMAP_HOLE: + if (WARN_ON_ONCE(dio->flags & IOMAP_DIO_WRITE)) + return -EIO; +- return iomap_dio_hole_actor(length, dio); ++ return iomap_dio_hole_iter(iter, dio); + case IOMAP_UNWRITTEN: + if (!(dio->flags & IOMAP_DIO_WRITE)) +- return iomap_dio_hole_actor(length, dio); +- return iomap_dio_bio_actor(inode, pos, length, dio, iomap); ++ return iomap_dio_hole_iter(iter, dio); ++ return iomap_dio_bio_iter(iter, dio); + case IOMAP_MAPPED: +- return iomap_dio_bio_actor(inode, pos, length, dio, iomap); ++ return iomap_dio_bio_iter(iter, dio); + case IOMAP_INLINE: +- return iomap_dio_inline_actor(inode, pos, length, dio, iomap); ++ return iomap_dio_inline_iter(iter, dio); + case IOMAP_DELALLOC: + /* + * DIO is not serialised against mmap() access at all, and so + * if the page_mkwrite occurs between the writeback and the +- * iomap_apply() call in the DIO path, then it will see the ++ * iomap_iter() call in the DIO path, then it will see the + * DELALLOC block that the page-mkwrite allocated. + */ + pr_warn_ratelimited("Direct I/O collision with buffered writes! File: %pD4 Comm: %.20s\n", +@@ -456,16 +456,19 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + { + struct address_space *mapping = iocb->ki_filp->f_mapping; + struct inode *inode = file_inode(iocb->ki_filp); +- size_t count = iov_iter_count(iter); +- loff_t pos = iocb->ki_pos; +- loff_t end = iocb->ki_pos + count - 1, ret = 0; ++ struct iomap_iter iomi = { ++ .inode = inode, ++ .pos = iocb->ki_pos, ++ .len = iov_iter_count(iter), ++ .flags = IOMAP_DIRECT, ++ }; ++ loff_t end = iomi.pos + iomi.len - 1, ret = 0; + bool wait_for_completion = + is_sync_kiocb(iocb) || (dio_flags & IOMAP_DIO_FORCE_WAIT); +- unsigned int iomap_flags = IOMAP_DIRECT; + struct blk_plug plug; + struct iomap_dio *dio; + +- if (!count) ++ if (!iomi.len) + return NULL; + + dio = kmalloc(sizeof(*dio), GFP_KERNEL); +@@ -486,29 +489,30 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + dio->submit.last_queue = NULL; + + if (iov_iter_rw(iter) == READ) { +- if (pos >= dio->i_size) ++ if (iomi.pos >= dio->i_size) + goto out_free_dio; + + if (iocb->ki_flags & IOCB_NOWAIT) { +- if (filemap_range_needs_writeback(mapping, pos, end)) { ++ if (filemap_range_needs_writeback(mapping, iomi.pos, ++ end)) { + ret = -EAGAIN; + goto out_free_dio; + } +- iomap_flags |= IOMAP_NOWAIT; ++ iomi.flags |= IOMAP_NOWAIT; + } + + if (iter_is_iovec(iter)) + dio->flags |= IOMAP_DIO_DIRTY; + } else { +- iomap_flags |= IOMAP_WRITE; ++ iomi.flags |= IOMAP_WRITE; + dio->flags |= IOMAP_DIO_WRITE; + + if (iocb->ki_flags & IOCB_NOWAIT) { +- if (filemap_range_has_page(mapping, pos, end)) { ++ if (filemap_range_has_page(mapping, iomi.pos, end)) { + ret = -EAGAIN; + goto out_free_dio; + } +- iomap_flags |= IOMAP_NOWAIT; ++ iomi.flags |= IOMAP_NOWAIT; + } + + /* for data sync or sync, we need sync completion processing */ +@@ -527,12 +531,13 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + + if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) { + ret = -EAGAIN; +- if (pos >= dio->i_size || pos + count > dio->i_size) ++ if (iomi.pos >= dio->i_size || ++ iomi.pos + iomi.len > dio->i_size) + goto out_free_dio; +- iomap_flags |= IOMAP_OVERWRITE_ONLY; ++ iomi.flags |= IOMAP_OVERWRITE_ONLY; + } + +- ret = filemap_write_and_wait_range(mapping, pos, end); ++ ret = filemap_write_and_wait_range(mapping, iomi.pos, end); + if (ret) + goto out_free_dio; + +@@ -542,9 +547,10 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + * If this invalidation fails, let the caller fall back to + * buffered I/O. + */ +- if (invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, +- end >> PAGE_SHIFT)) { +- trace_iomap_dio_invalidate_fail(inode, pos, count); ++ if (invalidate_inode_pages2_range(mapping, ++ iomi.pos >> PAGE_SHIFT, end >> PAGE_SHIFT)) { ++ trace_iomap_dio_invalidate_fail(inode, iomi.pos, ++ iomi.len); + ret = -ENOTBLK; + goto out_free_dio; + } +@@ -559,31 +565,23 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, + inode_dio_begin(inode); + + blk_start_plug(&plug); +- do { +- ret = iomap_apply(inode, pos, count, iomap_flags, ops, dio, +- iomap_dio_actor); +- if (ret <= 0) { +- /* magic error code to fall back to buffered I/O */ +- if (ret == -ENOTBLK) { +- wait_for_completion = true; +- ret = 0; +- } +- break; +- } +- pos += ret; +- +- if (iov_iter_rw(iter) == READ && pos >= dio->i_size) { +- /* +- * We only report that we've read data up to i_size. +- * Revert iter to a state corresponding to that as +- * some callers (such as splice code) rely on it. +- */ +- iov_iter_revert(iter, pos - dio->i_size); +- break; +- } +- } while ((count = iov_iter_count(iter)) > 0); ++ while ((ret = iomap_iter(&iomi, ops)) > 0) ++ iomi.processed = iomap_dio_iter(&iomi, dio); + blk_finish_plug(&plug); + ++ /* ++ * We only report that we've read data up to i_size. ++ * Revert iter to a state corresponding to that as some callers (such ++ * as the splice code) rely on it. ++ */ ++ if (iov_iter_rw(iter) == READ && iomi.pos >= dio->i_size) ++ iov_iter_revert(iter, iomi.pos - dio->i_size); ++ ++ /* magic error code to fall back to buffered I/O */ ++ if (ret == -ENOTBLK) { ++ wait_for_completion = true; ++ ret = 0; ++ } + if (ret < 0) + iomap_dio_set_error(dio, ret); + +diff --git a/include/linux/iomap.h b/include/linux/iomap.h +index aac4176ea164..66e04aedd2ca 100644 +--- a/include/linux/iomap.h ++++ b/include/linux/iomap.h +@@ -322,8 +322,8 @@ int iomap_writepages(struct address_space *mapping, + struct iomap_dio_ops { + int (*end_io)(struct kiocb *iocb, ssize_t size, int error, + unsigned flags); +- blk_qc_t (*submit_io)(struct inode *inode, struct iomap *iomap, +- struct bio *bio, loff_t file_offset); ++ blk_qc_t (*submit_io)(const struct iomap_iter *iter, struct bio *bio, ++ loff_t file_offset); + }; + + /* +-- +2.35.3 + diff --git a/patches.suse/iomap-switch-iomap_bmap-to-use-iomap_iter.patch b/patches.suse/iomap-switch-iomap_bmap-to-use-iomap_iter.patch new file mode 100644 index 0000000..ff3e3d8 --- /dev/null +++ b/patches.suse/iomap-switch-iomap_bmap-to-use-iomap_iter.patch @@ -0,0 +1,77 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:11 -0700 +Subject: [PATCH] iomap: switch iomap_bmap to use iomap_iter +Git-commit: 6d8a1287a48909dbf542470aa2ca1ef7ceab3fc1 +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +Rewrite the ->bmap implementation based on iomap_iter. + +Signed-off-by: Christoph Hellwig +[djwong: restructure the loop to make its behavior a little clearer] +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Acked-by: Hannes Reinecke +--- + fs/iomap/fiemap.c | 31 +++++++++++++------------------ + 1 file changed, 13 insertions(+), 18 deletions(-) + +diff --git a/fs/iomap/fiemap.c b/fs/iomap/fiemap.c +index acad09a8c188..66cf267c68ae 100644 +--- a/fs/iomap/fiemap.c ++++ b/fs/iomap/fiemap.c +@@ -92,37 +92,32 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi, + } + EXPORT_SYMBOL_GPL(iomap_fiemap); + +-static loff_t +-iomap_bmap_actor(struct inode *inode, loff_t pos, loff_t length, +- void *data, struct iomap *iomap, struct iomap *srcmap) +-{ +- sector_t *bno = data, addr; +- +- if (iomap->type == IOMAP_MAPPED) { +- addr = (pos - iomap->offset + iomap->addr) >> inode->i_blkbits; +- *bno = addr; +- } +- return 0; +-} +- + /* legacy ->bmap interface. 0 is the error return (!) */ + sector_t + iomap_bmap(struct address_space *mapping, sector_t bno, + const struct iomap_ops *ops) + { +- struct inode *inode = mapping->host; +- loff_t pos = bno << inode->i_blkbits; +- unsigned blocksize = i_blocksize(inode); ++ struct iomap_iter iter = { ++ .inode = mapping->host, ++ .pos = (loff_t)bno << mapping->host->i_blkbits, ++ .len = i_blocksize(mapping->host), ++ .flags = IOMAP_REPORT, ++ }; ++ const unsigned int blkshift = mapping->host->i_blkbits - SECTOR_SHIFT; + int ret; + + if (filemap_write_and_wait(mapping)) + return 0; + + bno = 0; +- ret = iomap_apply(inode, pos, blocksize, 0, ops, &bno, +- iomap_bmap_actor); ++ while ((ret = iomap_iter(&iter, ops)) > 0) { ++ if (iter.iomap.type == IOMAP_MAPPED) ++ bno = iomap_sector(&iter.iomap, iter.pos) >> blkshift; ++ /* leave iter.processed unset to abort loop */ ++ } + if (ret) + return 0; ++ + return bno; + } + EXPORT_SYMBOL_GPL(iomap_bmap); +-- +2.35.3 + diff --git a/patches.suse/iomap-switch-iomap_fiemap-to-use-iomap_iter.patch b/patches.suse/iomap-switch-iomap_fiemap-to-use-iomap_iter.patch new file mode 100644 index 0000000..2c3477f --- /dev/null +++ b/patches.suse/iomap-switch-iomap_fiemap-to-use-iomap_iter.patch @@ -0,0 +1,138 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:10 -0700 +Subject: [PATCH] iomap: switch iomap_fiemap to use iomap_iter +Git-commit: 7892386d35715d14c469ec98b6deab037e2e2232 +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +Rewrite the ->fiemap implementation based on iomap_iter. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + fs/iomap/fiemap.c | 70 ++++++++++++++++++++--------------------------- + 1 file changed, 29 insertions(+), 41 deletions(-) + +diff --git a/fs/iomap/fiemap.c b/fs/iomap/fiemap.c +index aab070df4a21..acad09a8c188 100644 +--- a/fs/iomap/fiemap.c ++++ b/fs/iomap/fiemap.c +@@ -1,6 +1,6 @@ + // SPDX-License-Identifier: GPL-2.0 + /* +- * Copyright (c) 2016-2018 Christoph Hellwig. ++ * Copyright (c) 2016-2021 Christoph Hellwig. + */ + #include + #include +@@ -8,13 +8,8 @@ + #include + #include + +-struct fiemap_ctx { +- struct fiemap_extent_info *fi; +- struct iomap prev; +-}; +- + static int iomap_to_fiemap(struct fiemap_extent_info *fi, +- struct iomap *iomap, u32 flags) ++ const struct iomap *iomap, u32 flags) + { + switch (iomap->type) { + case IOMAP_HOLE: +@@ -43,24 +38,22 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi, + iomap->length, flags); + } + +-static loff_t +-iomap_fiemap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, +- struct iomap *iomap, struct iomap *srcmap) ++static loff_t iomap_fiemap_iter(const struct iomap_iter *iter, ++ struct fiemap_extent_info *fi, struct iomap *prev) + { +- struct fiemap_ctx *ctx = data; +- loff_t ret = length; ++ int ret; + +- if (iomap->type == IOMAP_HOLE) +- return length; ++ if (iter->iomap.type == IOMAP_HOLE) ++ return iomap_length(iter); + +- ret = iomap_to_fiemap(ctx->fi, &ctx->prev, 0); +- ctx->prev = *iomap; ++ ret = iomap_to_fiemap(fi, prev, 0); ++ *prev = iter->iomap; + switch (ret) { + case 0: /* success */ +- return length; ++ return iomap_length(iter); + case 1: /* extent array full */ + return 0; +- default: ++ default: /* error */ + return ret; + } + } +@@ -68,38 +61,33 @@ iomap_fiemap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, + int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi, + u64 start, u64 len, const struct iomap_ops *ops) + { +- struct fiemap_ctx ctx; +- loff_t ret; +- +- memset(&ctx, 0, sizeof(ctx)); +- ctx.fi = fi; +- ctx.prev.type = IOMAP_HOLE; ++ struct iomap_iter iter = { ++ .inode = inode, ++ .pos = start, ++ .len = len, ++ .flags = IOMAP_REPORT, ++ }; ++ struct iomap prev = { ++ .type = IOMAP_HOLE, ++ }; ++ int ret; + +- ret = fiemap_prep(inode, fi, start, &len, 0); ++ ret = fiemap_prep(inode, fi, start, &iter.len, 0); + if (ret) + return ret; + +- while (len > 0) { +- ret = iomap_apply(inode, start, len, IOMAP_REPORT, ops, &ctx, +- iomap_fiemap_actor); +- /* inode with no (attribute) mapping will give ENOENT */ +- if (ret == -ENOENT) +- break; +- if (ret < 0) +- return ret; +- if (ret == 0) +- break; ++ while ((ret = iomap_iter(&iter, ops)) > 0) ++ iter.processed = iomap_fiemap_iter(&iter, fi, &prev); + +- start += ret; +- len -= ret; +- } +- +- if (ctx.prev.type != IOMAP_HOLE) { +- ret = iomap_to_fiemap(fi, &ctx.prev, FIEMAP_EXTENT_LAST); ++ if (prev.type != IOMAP_HOLE) { ++ ret = iomap_to_fiemap(fi, &prev, FIEMAP_EXTENT_LAST); + if (ret < 0) + return ret; + } + ++ /* inode with no (attribute) mapping will give ENOENT */ ++ if (ret < 0 && ret != -ENOENT) ++ return ret; + return 0; + } + EXPORT_SYMBOL_GPL(iomap_fiemap); +-- +2.35.3 + diff --git a/patches.suse/iomap-switch-iomap_file_buffered_write-to-use-iomap_.patch b/patches.suse/iomap-switch-iomap_file_buffered_write-to-use-iomap_.patch new file mode 100644 index 0000000..b968447 --- /dev/null +++ b/patches.suse/iomap-switch-iomap_file_buffered_write-to-use-iomap_.patch @@ -0,0 +1,112 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:08 -0700 +Subject: [PATCH] iomap: switch iomap_file_buffered_write to use iomap_iter +Git-commit: ce83a0251c6ec2152f3449484d22e87f467c4a66 +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +Switch iomap_file_buffered_write to use iomap_iter. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + fs/iomap/buffered-io.c | 49 +++++++++++++++++++++--------------------- + 1 file changed, 25 insertions(+), 24 deletions(-) + +diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c +index 32d80350bb55..a151b3b49038 100644 +--- a/fs/iomap/buffered-io.c ++++ b/fs/iomap/buffered-io.c +@@ -726,13 +726,14 @@ static size_t iomap_write_end(struct inode *inode, loff_t pos, size_t len, + return ret; + } + +-static loff_t +-iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data, +- struct iomap *iomap, struct iomap *srcmap) ++static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) + { +- struct iov_iter *i = data; +- long status = 0; ++ struct iomap *srcmap = iomap_iter_srcmap(iter); ++ struct iomap *iomap = &iter->iomap; ++ loff_t length = iomap_length(iter); ++ loff_t pos = iter->pos; + ssize_t written = 0; ++ long status = 0; + + do { + struct page *page; +@@ -758,18 +759,18 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data, + break; + } + +- status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, +- srcmap); ++ status = iomap_write_begin(iter->inode, pos, bytes, 0, &page, ++ iomap, srcmap); + if (unlikely(status)) + break; + +- if (mapping_writably_mapped(inode->i_mapping)) ++ if (mapping_writably_mapped(iter->inode->i_mapping)) + flush_dcache_page(page); + + copied = copy_page_from_iter_atomic(page, offset, bytes, i); + +- status = iomap_write_end(inode, pos, bytes, copied, page, iomap, +- srcmap); ++ status = iomap_write_end(iter->inode, pos, bytes, copied, page, ++ iomap, srcmap); + + if (unlikely(copied != status)) + iov_iter_revert(i, copied - status); +@@ -790,29 +791,29 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data, + written += status; + length -= status; + +- balance_dirty_pages_ratelimited(inode->i_mapping); ++ balance_dirty_pages_ratelimited(iter->inode->i_mapping); + } while (iov_iter_count(i) && length); + + return written ? written : status; + } + + ssize_t +-iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *iter, ++iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, + const struct iomap_ops *ops) + { +- struct inode *inode = iocb->ki_filp->f_mapping->host; +- loff_t pos = iocb->ki_pos, ret = 0, written = 0; +- +- while (iov_iter_count(iter)) { +- ret = iomap_apply(inode, pos, iov_iter_count(iter), +- IOMAP_WRITE, ops, iter, iomap_write_actor); +- if (ret <= 0) +- break; +- pos += ret; +- written += ret; +- } ++ struct iomap_iter iter = { ++ .inode = iocb->ki_filp->f_mapping->host, ++ .pos = iocb->ki_pos, ++ .len = iov_iter_count(i), ++ .flags = IOMAP_WRITE, ++ }; ++ int ret; + +- return written ? written : ret; ++ while ((ret = iomap_iter(&iter, ops)) > 0) ++ iter.processed = iomap_write_iter(&iter, i); ++ if (iter.pos == iocb->ki_pos) ++ return ret; ++ return iter.pos - iocb->ki_pos; + } + EXPORT_SYMBOL_GPL(iomap_file_buffered_write); + +-- +2.35.3 + diff --git a/patches.suse/iomap-switch-iomap_file_unshare-to-use-iomap_iter.patch b/patches.suse/iomap-switch-iomap_file_unshare-to-use-iomap_iter.patch new file mode 100644 index 0000000..bfa9748 --- /dev/null +++ b/patches.suse/iomap-switch-iomap_file_unshare-to-use-iomap_iter.patch @@ -0,0 +1,93 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:09 -0700 +Subject: [PATCH] iomap: switch iomap_file_unshare to use iomap_iter +Git-commit: 8fc274d1f4b4fe629da3b84b6e5a7ef08a91df49 +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +Switch iomap_file_unshare to use iomap_iter. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + fs/iomap/buffered-io.c | 35 ++++++++++++++++++----------------- + 1 file changed, 18 insertions(+), 17 deletions(-) + +diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c +index a151b3b49038..1a334cd0a36d 100644 +--- a/fs/iomap/buffered-io.c ++++ b/fs/iomap/buffered-io.c +@@ -817,10 +817,12 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, + } + EXPORT_SYMBOL_GPL(iomap_file_buffered_write); + +-static loff_t +-iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data, +- struct iomap *iomap, struct iomap *srcmap) ++static loff_t iomap_unshare_iter(struct iomap_iter *iter) + { ++ struct iomap *iomap = &iter->iomap; ++ struct iomap *srcmap = iomap_iter_srcmap(iter); ++ loff_t pos = iter->pos; ++ loff_t length = iomap_length(iter); + long status = 0; + loff_t written = 0; + +@@ -836,12 +838,12 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data, + unsigned long bytes = min_t(loff_t, PAGE_SIZE - offset, length); + struct page *page; + +- status = iomap_write_begin(inode, pos, bytes, ++ status = iomap_write_begin(iter->inode, pos, bytes, + IOMAP_WRITE_F_UNSHARE, &page, iomap, srcmap); + if (unlikely(status)) + return status; + +- status = iomap_write_end(inode, pos, bytes, bytes, page, iomap, ++ status = iomap_write_end(iter->inode, pos, bytes, bytes, page, iomap, + srcmap); + if (WARN_ON_ONCE(status == 0)) + return -EIO; +@@ -852,7 +854,7 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data, + written += status; + length -= status; + +- balance_dirty_pages_ratelimited(inode->i_mapping); ++ balance_dirty_pages_ratelimited(iter->inode->i_mapping); + } while (length); + + return written; +@@ -862,18 +864,17 @@ int + iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, + const struct iomap_ops *ops) + { +- loff_t ret; +- +- while (len) { +- ret = iomap_apply(inode, pos, len, IOMAP_WRITE, ops, NULL, +- iomap_unshare_actor); +- if (ret <= 0) +- return ret; +- pos += ret; +- len -= ret; +- } ++ struct iomap_iter iter = { ++ .inode = inode, ++ .pos = pos, ++ .len = len, ++ .flags = IOMAP_WRITE, ++ }; ++ int ret; + +- return 0; ++ while ((ret = iomap_iter(&iter, ops)) > 0) ++ iter.processed = iomap_unshare_iter(&iter); ++ return ret; + } + EXPORT_SYMBOL_GPL(iomap_file_unshare); + +-- +2.35.3 + diff --git a/patches.suse/iomap-switch-iomap_page_mkwrite-to-use-iomap_iter.patch b/patches.suse/iomap-switch-iomap_page_mkwrite-to-use-iomap_iter.patch new file mode 100644 index 0000000..9ec2231 --- /dev/null +++ b/patches.suse/iomap-switch-iomap_page_mkwrite-to-use-iomap_iter.patch @@ -0,0 +1,87 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:09 -0700 +Subject: [PATCH] iomap: switch iomap_page_mkwrite to use iomap_iter +Git-commit: 253564bafff31382b412839b0e1bb44c19c51172 +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +Switch iomap_page_mkwrite to use iomap_iter. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + fs/iomap/buffered-io.c | 39 +++++++++++++++++---------------------- + 1 file changed, 17 insertions(+), 22 deletions(-) + +diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c +index c6b86eb686f7..59db1e30a666 100644 +--- a/fs/iomap/buffered-io.c ++++ b/fs/iomap/buffered-io.c +@@ -961,15 +961,15 @@ iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, + } + EXPORT_SYMBOL_GPL(iomap_truncate_page); + +-static loff_t +-iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, +- void *data, struct iomap *iomap, struct iomap *srcmap) ++static loff_t iomap_page_mkwrite_iter(struct iomap_iter *iter, ++ struct page *page) + { +- struct page *page = data; ++ loff_t length = iomap_length(iter); + int ret; + +- if (iomap->flags & IOMAP_F_BUFFER_HEAD) { +- ret = __block_write_begin_int(page, pos, length, NULL, iomap); ++ if (iter->iomap.flags & IOMAP_F_BUFFER_HEAD) { ++ ret = __block_write_begin_int(page, iter->pos, length, NULL, ++ &iter->iomap); + if (ret) + return ret; + block_commit_write(page, 0, length); +@@ -983,29 +983,24 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, + + vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops) + { ++ struct iomap_iter iter = { ++ .inode = file_inode(vmf->vma->vm_file), ++ .flags = IOMAP_WRITE | IOMAP_FAULT, ++ }; + struct page *page = vmf->page; +- struct inode *inode = file_inode(vmf->vma->vm_file); +- unsigned long length; +- loff_t offset; + ssize_t ret; + + lock_page(page); +- ret = page_mkwrite_check_truncate(page, inode); ++ ret = page_mkwrite_check_truncate(page, iter.inode); + if (ret < 0) + goto out_unlock; +- length = ret; +- +- offset = page_offset(page); +- while (length > 0) { +- ret = iomap_apply(inode, offset, length, +- IOMAP_WRITE | IOMAP_FAULT, ops, page, +- iomap_page_mkwrite_actor); +- if (unlikely(ret <= 0)) +- goto out_unlock; +- offset += ret; +- length -= ret; +- } ++ iter.pos = page_offset(page); ++ iter.len = ret; ++ while ((ret = iomap_iter(&iter, ops)) > 0) ++ iter.processed = iomap_page_mkwrite_iter(&iter, page); + ++ if (ret < 0) ++ goto out_unlock; + wait_for_stable_page(page); + return VM_FAULT_LOCKED; + out_unlock: +-- +2.35.3 + diff --git a/patches.suse/iomap-switch-iomap_seek_data-to-use-iomap_iter.patch b/patches.suse/iomap-switch-iomap_seek_data-to-use-iomap_iter.patch new file mode 100644 index 0000000..8aa5221 --- /dev/null +++ b/patches.suse/iomap-switch-iomap_seek_data-to-use-iomap_iter.patch @@ -0,0 +1,96 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:12 -0700 +Subject: [PATCH] iomap: switch iomap_seek_data to use iomap_iter +Git-commit: c4740bf1edad559c10b1d33c72e885b920bf6029 +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +Rewrite iomap_seek_data to use iomap_iter. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + fs/iomap/seek.c | 47 ++++++++++++++++++++++++----------------------- + 1 file changed, 24 insertions(+), 23 deletions(-) + +diff --git a/fs/iomap/seek.c b/fs/iomap/seek.c +index fed8f9005f9e..a845c012b50c 100644 +--- a/fs/iomap/seek.c ++++ b/fs/iomap/seek.c +@@ -56,47 +56,48 @@ iomap_seek_hole(struct inode *inode, loff_t pos, const struct iomap_ops *ops) + } + EXPORT_SYMBOL_GPL(iomap_seek_hole); + +-static loff_t +-iomap_seek_data_actor(struct inode *inode, loff_t start, loff_t length, +- void *data, struct iomap *iomap, struct iomap *srcmap) ++static loff_t iomap_seek_data_iter(const struct iomap_iter *iter, ++ loff_t *hole_pos) + { +- loff_t offset = start; ++ loff_t length = iomap_length(iter); + +- switch (iomap->type) { ++ switch (iter->iomap.type) { + case IOMAP_HOLE: + return length; + case IOMAP_UNWRITTEN: +- offset = mapping_seek_hole_data(inode->i_mapping, start, +- start + length, SEEK_DATA); +- if (offset < 0) ++ *hole_pos = mapping_seek_hole_data(iter->inode->i_mapping, ++ iter->pos, iter->pos + length, SEEK_DATA); ++ if (*hole_pos < 0) + return length; +- fallthrough; ++ return 0; + default: +- *(loff_t *)data = offset; ++ *hole_pos = iter->pos; + return 0; + } + } + + loff_t +-iomap_seek_data(struct inode *inode, loff_t offset, const struct iomap_ops *ops) ++iomap_seek_data(struct inode *inode, loff_t pos, const struct iomap_ops *ops) + { + loff_t size = i_size_read(inode); +- loff_t ret; ++ struct iomap_iter iter = { ++ .inode = inode, ++ .pos = pos, ++ .flags = IOMAP_REPORT, ++ }; ++ int ret; + + /* Nothing to be found before or beyond the end of the file. */ +- if (offset < 0 || offset >= size) ++ if (pos < 0 || pos >= size) + return -ENXIO; + +- while (offset < size) { +- ret = iomap_apply(inode, offset, size - offset, IOMAP_REPORT, +- ops, &offset, iomap_seek_data_actor); +- if (ret < 0) +- return ret; +- if (ret == 0) +- return offset; +- offset += ret; +- } +- ++ iter.len = size - pos; ++ while ((ret = iomap_iter(&iter, ops)) > 0) ++ iter.processed = iomap_seek_data_iter(&iter, &pos); ++ if (ret < 0) ++ return ret; ++ if (iter.len) /* found data before EOF */ ++ return pos; + /* We've reached the end of the file without finding data */ + return -ENXIO; + } +-- +2.35.3 + diff --git a/patches.suse/iomap-switch-iomap_seek_hole-to-use-iomap_iter.patch b/patches.suse/iomap-switch-iomap_seek_hole-to-use-iomap_iter.patch new file mode 100644 index 0000000..78ad51e --- /dev/null +++ b/patches.suse/iomap-switch-iomap_seek_hole-to-use-iomap_iter.patch @@ -0,0 +1,107 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:11 -0700 +Subject: [PATCH] iomap: switch iomap_seek_hole to use iomap_iter +Git-commit: 40670d18e878160a170ba135c5d077471d7a9998 +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +Rewrite iomap_seek_hole to use iomap_iter. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + fs/iomap/seek.c | 51 +++++++++++++++++++++++++------------------------ + 1 file changed, 26 insertions(+), 25 deletions(-) + +diff --git a/fs/iomap/seek.c b/fs/iomap/seek.c +index ce6fb810854f..fed8f9005f9e 100644 +--- a/fs/iomap/seek.c ++++ b/fs/iomap/seek.c +@@ -1,7 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0 + /* + * Copyright (C) 2017 Red Hat, Inc. +- * Copyright (c) 2018 Christoph Hellwig. ++ * Copyright (c) 2018-2021 Christoph Hellwig. + */ + #include + #include +@@ -10,21 +10,20 @@ + #include + #include + +-static loff_t +-iomap_seek_hole_actor(struct inode *inode, loff_t start, loff_t length, +- void *data, struct iomap *iomap, struct iomap *srcmap) ++static loff_t iomap_seek_hole_iter(const struct iomap_iter *iter, ++ loff_t *hole_pos) + { +- loff_t offset = start; ++ loff_t length = iomap_length(iter); + +- switch (iomap->type) { ++ switch (iter->iomap.type) { + case IOMAP_UNWRITTEN: +- offset = mapping_seek_hole_data(inode->i_mapping, start, +- start + length, SEEK_HOLE); +- if (offset == start + length) ++ *hole_pos = mapping_seek_hole_data(iter->inode->i_mapping, ++ iter->pos, iter->pos + length, SEEK_HOLE); ++ if (*hole_pos == iter->pos + length) + return length; +- fallthrough; ++ return 0; + case IOMAP_HOLE: +- *(loff_t *)data = offset; ++ *hole_pos = iter->pos; + return 0; + default: + return length; +@@ -32,26 +31,28 @@ iomap_seek_hole_actor(struct inode *inode, loff_t start, loff_t length, + } + + loff_t +-iomap_seek_hole(struct inode *inode, loff_t offset, const struct iomap_ops *ops) ++iomap_seek_hole(struct inode *inode, loff_t pos, const struct iomap_ops *ops) + { + loff_t size = i_size_read(inode); +- loff_t ret; ++ struct iomap_iter iter = { ++ .inode = inode, ++ .pos = pos, ++ .flags = IOMAP_REPORT, ++ }; ++ int ret; + + /* Nothing to be found before or beyond the end of the file. */ +- if (offset < 0 || offset >= size) ++ if (pos < 0 || pos >= size) + return -ENXIO; + +- while (offset < size) { +- ret = iomap_apply(inode, offset, size - offset, IOMAP_REPORT, +- ops, &offset, iomap_seek_hole_actor); +- if (ret < 0) +- return ret; +- if (ret == 0) +- break; +- offset += ret; +- } +- +- return offset; ++ iter.len = size - pos; ++ while ((ret = iomap_iter(&iter, ops)) > 0) ++ iter.processed = iomap_seek_hole_iter(&iter, &pos); ++ if (ret < 0) ++ return ret; ++ if (iter.len) /* found hole before EOF */ ++ return pos; ++ return size; + } + EXPORT_SYMBOL_GPL(iomap_seek_hole); + +-- +2.35.3 + diff --git a/patches.suse/iomap-switch-iomap_swapfile_activate-to-use-iomap_it.patch b/patches.suse/iomap-switch-iomap_swapfile_activate-to-use-iomap_it.patch new file mode 100644 index 0000000..00fd7e4 --- /dev/null +++ b/patches.suse/iomap-switch-iomap_swapfile_activate-to-use-iomap_it.patch @@ -0,0 +1,100 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:12 -0700 +Subject: [PATCH] iomap: switch iomap_swapfile_activate to use iomap_iter +Git-commit: 3d99a1ce3854a6cee3217247ab6b2cca3985a7a2 +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +Switch iomap_swapfile_activate to use iomap_iter. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + fs/iomap/swapfile.c | 38 ++++++++++++++++---------------------- + 1 file changed, 16 insertions(+), 22 deletions(-) + +diff --git a/fs/iomap/swapfile.c b/fs/iomap/swapfile.c +index 6250ca6a1f85..7069606eca85 100644 +--- a/fs/iomap/swapfile.c ++++ b/fs/iomap/swapfile.c +@@ -88,13 +88,9 @@ static int iomap_swapfile_fail(struct iomap_swapfile_info *isi, const char *str) + * swap only cares about contiguous page-aligned physical extents and makes no + * distinction between written and unwritten extents. + */ +-static loff_t iomap_swapfile_activate_actor(struct inode *inode, loff_t pos, +- loff_t count, void *data, struct iomap *iomap, +- struct iomap *srcmap) ++static loff_t iomap_swapfile_iter(const struct iomap_iter *iter, ++ struct iomap *iomap, struct iomap_swapfile_info *isi) + { +- struct iomap_swapfile_info *isi = data; +- int error; +- + switch (iomap->type) { + case IOMAP_MAPPED: + case IOMAP_UNWRITTEN: +@@ -125,12 +121,12 @@ static loff_t iomap_swapfile_activate_actor(struct inode *inode, loff_t pos, + isi->iomap.length += iomap->length; + } else { + /* Otherwise, add the retained iomap and store this one. */ +- error = iomap_swapfile_add_extent(isi); ++ int error = iomap_swapfile_add_extent(isi); + if (error) + return error; + memcpy(&isi->iomap, iomap, sizeof(isi->iomap)); + } +- return count; ++ return iomap_length(iter); + } + + /* +@@ -141,16 +137,19 @@ int iomap_swapfile_activate(struct swap_info_struct *sis, + struct file *swap_file, sector_t *pagespan, + const struct iomap_ops *ops) + { ++ struct inode *inode = swap_file->f_mapping->host; ++ struct iomap_iter iter = { ++ .inode = inode, ++ .pos = 0, ++ .len = ALIGN_DOWN(i_size_read(inode), PAGE_SIZE), ++ .flags = IOMAP_REPORT, ++ }; + struct iomap_swapfile_info isi = { + .sis = sis, + .lowest_ppage = (sector_t)-1ULL, + .file = swap_file, + }; +- struct address_space *mapping = swap_file->f_mapping; +- struct inode *inode = mapping->host; +- loff_t pos = 0; +- loff_t len = ALIGN_DOWN(i_size_read(inode), PAGE_SIZE); +- loff_t ret; ++ int ret; + + /* + * Persist all file mapping metadata so that we won't have any +@@ -160,15 +159,10 @@ int iomap_swapfile_activate(struct swap_info_struct *sis, + if (ret) + return ret; + +- while (len > 0) { +- ret = iomap_apply(inode, pos, len, IOMAP_REPORT, +- ops, &isi, iomap_swapfile_activate_actor); +- if (ret <= 0) +- return ret; +- +- pos += ret; +- len -= ret; +- } ++ while ((ret = iomap_iter(&iter, ops)) > 0) ++ iter.processed = iomap_swapfile_iter(&iter, &iter.iomap, &isi); ++ if (ret < 0) ++ return ret; + + if (isi.iomap.length) { + ret = iomap_swapfile_add_extent(&isi); +-- +2.35.3 + diff --git a/patches.suse/iomap-switch-iomap_zero_range-to-use-iomap_iter.patch b/patches.suse/iomap-switch-iomap_zero_range-to-use-iomap_iter.patch new file mode 100644 index 0000000..b5bc6fd --- /dev/null +++ b/patches.suse/iomap-switch-iomap_zero_range-to-use-iomap_iter.patch @@ -0,0 +1,85 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:09 -0700 +Subject: [PATCH] iomap: switch iomap_zero_range to use iomap_iter +Git-commit: 2aa3048e03d38d5358be2553d4b638c1a018498c +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +Switch iomap_zero_range to use iomap_iter. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + fs/iomap/buffered-io.c | 36 ++++++++++++++++++------------------ + 1 file changed, 18 insertions(+), 18 deletions(-) + +diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c +index 1a334cd0a36d..c6b86eb686f7 100644 +--- a/fs/iomap/buffered-io.c ++++ b/fs/iomap/buffered-io.c +@@ -896,11 +896,12 @@ static s64 iomap_zero(struct inode *inode, loff_t pos, u64 length, + return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap); + } + +-static loff_t iomap_zero_range_actor(struct inode *inode, loff_t pos, +- loff_t length, void *data, struct iomap *iomap, +- struct iomap *srcmap) ++static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) + { +- bool *did_zero = data; ++ struct iomap *iomap = &iter->iomap; ++ struct iomap *srcmap = iomap_iter_srcmap(iter); ++ loff_t pos = iter->pos; ++ loff_t length = iomap_length(iter); + loff_t written = 0; + + /* already zeroed? we're done. */ +@@ -910,10 +911,11 @@ static loff_t iomap_zero_range_actor(struct inode *inode, loff_t pos, + do { + s64 bytes; + +- if (IS_DAX(inode)) ++ if (IS_DAX(iter->inode)) + bytes = dax_iomap_zero(pos, length, iomap); + else +- bytes = iomap_zero(inode, pos, length, iomap, srcmap); ++ bytes = iomap_zero(iter->inode, pos, length, iomap, ++ srcmap); + if (bytes < 0) + return bytes; + +@@ -931,19 +933,17 @@ int + iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, + const struct iomap_ops *ops) + { +- loff_t ret; +- +- while (len > 0) { +- ret = iomap_apply(inode, pos, len, IOMAP_ZERO, +- ops, did_zero, iomap_zero_range_actor); +- if (ret <= 0) +- return ret; +- +- pos += ret; +- len -= ret; +- } ++ struct iomap_iter iter = { ++ .inode = inode, ++ .pos = pos, ++ .len = len, ++ .flags = IOMAP_ZERO, ++ }; ++ int ret; + +- return 0; ++ while ((ret = iomap_iter(&iter, ops)) > 0) ++ iter.processed = iomap_zero_iter(&iter, did_zero); ++ return ret; + } + EXPORT_SYMBOL_GPL(iomap_zero_range); + +-- +2.35.3 + diff --git a/patches.suse/iomap-switch-readahead-and-readpage-to-use-iomap_ite.patch b/patches.suse/iomap-switch-readahead-and-readpage-to-use-iomap_ite.patch new file mode 100644 index 0000000..cff8b66 --- /dev/null +++ b/patches.suse/iomap-switch-readahead-and-readpage-to-use-iomap_ite.patch @@ -0,0 +1,165 @@ +From: Christoph Hellwig +Date: Tue, 10 Aug 2021 18:33:08 -0700 +Subject: [PATCH] iomap: switch readahead and readpage to use iomap_iter +Git-commit: f6d480006cea3fa1188931fe9751255f13365c4e +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +Switch the page cache read functions to use iomap_iter instead of +iomap_apply. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Hannes Reinecke +--- + fs/iomap/buffered-io.c | 80 +++++++++++++++++++----------------------- + 1 file changed, 37 insertions(+), 43 deletions(-) + +diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c +index 8418dffe8acf..32d80350bb55 100644 +--- a/fs/iomap/buffered-io.c ++++ b/fs/iomap/buffered-io.c +@@ -241,11 +241,12 @@ static inline bool iomap_block_needs_zeroing(struct inode *inode, + pos >= i_size_read(inode); + } + +-static loff_t +-iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, +- struct iomap *iomap, struct iomap *srcmap) ++static loff_t iomap_readpage_iter(struct iomap_iter *iter, ++ struct iomap_readpage_ctx *ctx, loff_t offset) + { +- struct iomap_readpage_ctx *ctx = data; ++ struct iomap *iomap = &iter->iomap; ++ loff_t pos = iter->pos + offset; ++ loff_t length = iomap_length(iter) - offset; + struct page *page = ctx->cur_page; + struct iomap_page *iop; + loff_t orig_pos = pos; +@@ -253,15 +254,16 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, + sector_t sector; + + if (iomap->type == IOMAP_INLINE) +- return min(iomap_read_inline_data(inode, page, iomap), length); ++ return min(iomap_read_inline_data(iter->inode, page, iomap), ++ length); + + /* zero post-eof blocks as the page may be mapped */ +- iop = iomap_page_create(inode, page); +- iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen); ++ iop = iomap_page_create(iter->inode, page); ++ iomap_adjust_read_range(iter->inode, iop, &pos, length, &poff, &plen); + if (plen == 0) + goto done; + +- if (iomap_block_needs_zeroing(inode, iomap, pos)) { ++ if (iomap_block_needs_zeroing(iter->inode, iomap, pos)) { + zero_user(page, poff, plen); + iomap_set_range_uptodate(page, poff, plen); + goto done; +@@ -313,23 +315,23 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, + int + iomap_readpage(struct page *page, const struct iomap_ops *ops) + { +- struct iomap_readpage_ctx ctx = { .cur_page = page }; +- struct inode *inode = page->mapping->host; +- unsigned poff; +- loff_t ret; ++ struct iomap_iter iter = { ++ .inode = page->mapping->host, ++ .pos = page_offset(page), ++ .len = PAGE_SIZE, ++ }; ++ struct iomap_readpage_ctx ctx = { ++ .cur_page = page, ++ }; ++ int ret; + + trace_iomap_readpage(page->mapping->host, 1); + +- for (poff = 0; poff < PAGE_SIZE; poff += ret) { +- ret = iomap_apply(inode, page_offset(page) + poff, +- PAGE_SIZE - poff, 0, ops, &ctx, +- iomap_readpage_actor); +- if (ret <= 0) { +- WARN_ON_ONCE(ret == 0); +- SetPageError(page); +- break; +- } +- } ++ while ((ret = iomap_iter(&iter, ops)) > 0) ++ iter.processed = iomap_readpage_iter(&iter, &ctx, 0); ++ ++ if (ret < 0) ++ SetPageError(page); + + if (ctx.bio) { + submit_bio(ctx.bio); +@@ -348,15 +350,14 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops) + } + EXPORT_SYMBOL_GPL(iomap_readpage); + +-static loff_t +-iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length, +- void *data, struct iomap *iomap, struct iomap *srcmap) ++static loff_t iomap_readahead_iter(struct iomap_iter *iter, ++ struct iomap_readpage_ctx *ctx) + { +- struct iomap_readpage_ctx *ctx = data; ++ loff_t length = iomap_length(iter); + loff_t done, ret; + + for (done = 0; done < length; done += ret) { +- if (ctx->cur_page && offset_in_page(pos + done) == 0) { ++ if (ctx->cur_page && offset_in_page(iter->pos + done) == 0) { + if (!ctx->cur_page_in_bio) + unlock_page(ctx->cur_page); + put_page(ctx->cur_page); +@@ -366,8 +367,7 @@ iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length, + ctx->cur_page = readahead_page(ctx->rac); + ctx->cur_page_in_bio = false; + } +- ret = iomap_readpage_actor(inode, pos + done, length - done, +- ctx, iomap, srcmap); ++ ret = iomap_readpage_iter(iter, ctx, done); + } + + return done; +@@ -390,25 +390,19 @@ iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length, + */ + void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) + { +- struct inode *inode = rac->mapping->host; +- loff_t pos = readahead_pos(rac); +- size_t length = readahead_length(rac); ++ struct iomap_iter iter = { ++ .inode = rac->mapping->host, ++ .pos = readahead_pos(rac), ++ .len = readahead_length(rac), ++ }; + struct iomap_readpage_ctx ctx = { + .rac = rac, + }; + +- trace_iomap_readahead(inode, readahead_count(rac)); ++ trace_iomap_readahead(rac->mapping->host, readahead_count(rac)); + +- while (length > 0) { +- ssize_t ret = iomap_apply(inode, pos, length, 0, ops, +- &ctx, iomap_readahead_actor); +- if (ret <= 0) { +- WARN_ON_ONCE(ret == 0); +- break; +- } +- pos += ret; +- length -= ret; +- } ++ while (iomap_iter(&iter, ops) > 0) ++ iter.processed = iomap_readahead_iter(&iter, &ctx); + + if (ctx.bio) + submit_bio(ctx.bio); +-- +2.35.3 + diff --git a/patches.suse/jfs-use-bdev_nr_bytes-instead-of-open-coding-it.patch b/patches.suse/jfs-use-bdev_nr_bytes-instead-of-open-coding-it.patch new file mode 100644 index 0000000..615ae8b --- /dev/null +++ b/patches.suse/jfs-use-bdev_nr_bytes-instead-of-open-coding-it.patch @@ -0,0 +1,49 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:17 +0200 +Subject: [PATCH] jfs: use bdev_nr_bytes instead of open coding it +Git-commit: 74e157e6a499ef47edc39cff8c37f77d01c0d155 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helper to read the block device size. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Acked-by: Dave Kleikamp +Link: https://lore.kernel.org/r/20211018101130.1838532-18-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/jfs/resize.c | 2 +- + fs/jfs/super.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c +index bde787c354fc..a42dbb0d3d28 100644 +--- a/fs/jfs/resize.c ++++ b/fs/jfs/resize.c +@@ -199,7 +199,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) + txQuiesce(sb); + + /* Reset size of direct inode */ +- sbi->direct_inode->i_size = i_size_read(sb->s_bdev->bd_inode); ++ sbi->direct_inode->i_size = bdev_nr_bytes(sb->s_bdev); + + if (sbi->mntflag & JFS_INLINELOG) { + /* +diff --git a/fs/jfs/super.c b/fs/jfs/super.c +index 9030aeaf0f88..9241caa16116 100644 +--- a/fs/jfs/super.c ++++ b/fs/jfs/super.c +@@ -551,7 +551,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) + ret = -ENOMEM; + goto out_unload; + } +- inode->i_size = i_size_read(sb->s_bdev->bd_inode); ++ inode->i_size = bdev_nr_bytes(sb->s_bdev); + inode->i_mapping->a_ops = &jfs_metapage_aops; + inode_fake_hash(inode); + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); +-- +2.35.3 + diff --git a/patches.suse/jfs-use-sb_bdev_nr_blocks.patch b/patches.suse/jfs-use-sb_bdev_nr_blocks.patch new file mode 100644 index 0000000..3a78fd7 --- /dev/null +++ b/patches.suse/jfs-use-sb_bdev_nr_blocks.patch @@ -0,0 +1,51 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:27 +0200 +Subject: [PATCH] jfs: use sb_bdev_nr_blocks +Git-commit: dd0c0bdf97a44c2e2b5541e9febde0643a9d0dbf +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the sb_bdev_nr_blocks helper instead of open coding it. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Acked-by: Dave Kleikamp +Link: https://lore.kernel.org/r/20211018101130.1838532-28-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/jfs/resize.c | 3 +-- + fs/jfs/super.c | 3 +-- + 2 files changed, 2 insertions(+), 4 deletions(-) + +diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c +index a42dbb0d3d28..8b9a72ae5efa 100644 +--- a/fs/jfs/resize.c ++++ b/fs/jfs/resize.c +@@ -86,8 +86,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) + goto out; + } + +- VolumeSize = i_size_read(sb->s_bdev->bd_inode) >> sb->s_blocksize_bits; +- ++ VolumeSize = sb_bdev_nr_blocks(sb); + if (VolumeSize) { + if (newLVSize > VolumeSize) { + printk(KERN_WARNING "jfs_extendfs: invalid size\n"); +diff --git a/fs/jfs/super.c b/fs/jfs/super.c +index 9241caa16116..24cbc9946e01 100644 +--- a/fs/jfs/super.c ++++ b/fs/jfs/super.c +@@ -284,8 +284,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, + } + case Opt_resize_nosize: + { +- *newLVSize = i_size_read(sb->s_bdev->bd_inode) >> +- sb->s_blocksize_bits; ++ *newLVSize = sb_bdev_nr_blocks(sb); + if (*newLVSize == 0) + pr_err("JFS: Cannot determine volume size\n"); + break; +-- +2.35.3 + diff --git a/patches.suse/kernel-add-product-identifying-information-to-kernel-build.patch b/patches.suse/kernel-add-product-identifying-information-to-kernel-build.patch index 2dfa1b4..b351127 100644 --- a/patches.suse/kernel-add-product-identifying-information-to-kernel-build.patch +++ b/patches.suse/kernel-add-product-identifying-information-to-kernel-build.patch @@ -60,8 +60,8 @@ Signed-off-by: Jeff Mahoney + $(call filechk,suse_version) + ifeq ($(KBUILD_EXTMOD),) - core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ - + core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ + core-$(CONFIG_BLOCK) += block/ @@ -1210,7 +1219,7 @@ PHONY += prepare archprepare archprepare: outputmakefile archheaders archscripts scripts include/config/kernel.release \ diff --git a/patches.suse/libata-libahci-declare-ahci_shost_attr_group-as-stat.patch b/patches.suse/libata-libahci-declare-ahci_shost_attr_group-as-stat.patch new file mode 100644 index 0000000..893f9ba --- /dev/null +++ b/patches.suse/libata-libahci-declare-ahci_shost_attr_group-as-stat.patch @@ -0,0 +1,37 @@ +From 1b87bda1f29a91720a410ac0819866a3cf0df32d Mon Sep 17 00:00:00 2001 +From: Damien Le Moal +Date: Thu, 11 Nov 2021 12:03:27 +0900 +Subject: [PATCH] libata: libahci: declare ahci_shost_attr_group as static +Git-commit: 1b87bda1f29a91720a410ac0819866a3cf0df32d +Patch-mainline: v5.16-rc1 +References: git-fixes + +ahci_shost_attr_group is referenced only in drivers/ata/libahci.c. +Declare it as static. + +Fixes: c3f69c7f629f ("scsi: ata: Switch to attribute groups") +Cc: Bart Van Assche +Signed-off-by: Damien Le Moal +Reviewed-by: Christoph Hellwig +Acked-by: Takashi Iwai + +--- + drivers/ata/libahci.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c +index 28430c093a7f..8a6835bfd18a 100644 +--- a/drivers/ata/libahci.c ++++ b/drivers/ata/libahci.c +@@ -131,7 +131,7 @@ const struct attribute_group *ahci_shost_groups[] = { + }; + EXPORT_SYMBOL_GPL(ahci_shost_groups); + +-struct attribute *ahci_sdev_attrs[] = { ++static struct attribute *ahci_sdev_attrs[] = { + &dev_attr_sw_activity.attr, + &dev_attr_unload_heads.attr, + &dev_attr_ncq_prio_supported.attr, +-- +2.35.3 + diff --git a/patches.suse/libata-support-concurrent-positioning-ranges-log.patch b/patches.suse/libata-support-concurrent-positioning-ranges-log.patch new file mode 100644 index 0000000..2436a47 --- /dev/null +++ b/patches.suse/libata-support-concurrent-positioning-ranges-log.patch @@ -0,0 +1,251 @@ +From: Damien Le Moal +Date: Wed, 27 Oct 2021 11:22:21 +0900 +Subject: [PATCH] libata: support concurrent positioning ranges log +Git-commit: fe22e1c2f705676a705d821301fc52eecc2fe055 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Add support to discover if an ATA device supports the Concurrent +Positioning Ranges data log (address 0x47), indicating that the device +is capable of seeking to multiple different locations in parallel using +multiple actuators serving different LBA ranges. + +Also add support to translate the concurrent positioning ranges log +into its equivalent Concurrent Positioning Ranges VPD page B9h in +libata-scsi.c. + +The format of the Concurrent Positioning Ranges Log is defined in ACS-5 +r9. + +Signed-off-by: Damien Le Moal +Reviewed-by: Hannes Reinecke +Reviewed-by: Christoph Hellwig +Reviewed-by: Martin K. Petersen +Reviewed-by: Keith Busch +Link: https://lore.kernel.org/r/20211027022223.183838-4-damien.lemoal@wdc.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/ata/libata-core.c | 57 +++++++++++++++++++++++++++++++++++++-- + drivers/ata/libata-scsi.c | 48 ++++++++++++++++++++++++++------- + include/linux/ata.h | 1 + + include/linux/libata.h | 15 +++++++++++ + 4 files changed, 110 insertions(+), 11 deletions(-) + +diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c +index eed65311b5d1..75f1a6cd6621 100644 +--- a/drivers/ata/libata-core.c ++++ b/drivers/ata/libata-core.c +@@ -2459,18 +2459,70 @@ static void ata_dev_config_devslp(struct ata_device *dev) + } + } + ++static void ata_dev_config_cpr(struct ata_device *dev) ++{ ++ unsigned int err_mask; ++ size_t buf_len; ++ int i, nr_cpr = 0; ++ struct ata_cpr_log *cpr_log = NULL; ++ u8 *desc, *buf = NULL; ++ ++ if (!ata_identify_page_supported(dev, ++ ATA_LOG_CONCURRENT_POSITIONING_RANGES)) ++ goto out; ++ ++ /* ++ * Read IDENTIFY DEVICE data log, page 0x47 ++ * (concurrent positioning ranges). We can have at most 255 32B range ++ * descriptors plus a 64B header. ++ */ ++ buf_len = (64 + 255 * 32 + 511) & ~511; ++ buf = kzalloc(buf_len, GFP_KERNEL); ++ if (!buf) ++ goto out; ++ ++ err_mask = ata_read_log_page(dev, ATA_LOG_IDENTIFY_DEVICE, ++ ATA_LOG_CONCURRENT_POSITIONING_RANGES, ++ buf, buf_len >> 9); ++ if (err_mask) ++ goto out; ++ ++ nr_cpr = buf[0]; ++ if (!nr_cpr) ++ goto out; ++ ++ cpr_log = kzalloc(struct_size(cpr_log, cpr, nr_cpr), GFP_KERNEL); ++ if (!cpr_log) ++ goto out; ++ ++ cpr_log->nr_cpr = nr_cpr; ++ desc = &buf[64]; ++ for (i = 0; i < nr_cpr; i++, desc += 32) { ++ cpr_log->cpr[i].num = desc[0]; ++ cpr_log->cpr[i].num_storage_elements = desc[1]; ++ cpr_log->cpr[i].start_lba = get_unaligned_le64(&desc[8]); ++ cpr_log->cpr[i].num_lbas = get_unaligned_le64(&desc[16]); ++ } ++ ++out: ++ swap(dev->cpr_log, cpr_log); ++ kfree(cpr_log); ++ kfree(buf); ++} ++ + static void ata_dev_print_features(struct ata_device *dev) + { + if (!(dev->flags & ATA_DFLAG_FEATURES_MASK)) + return; + + ata_dev_info(dev, +- "Features:%s%s%s%s%s\n", ++ "Features:%s%s%s%s%s%s\n", + dev->flags & ATA_DFLAG_TRUSTED ? " Trust" : "", + dev->flags & ATA_DFLAG_DA ? " Dev-Attention" : "", + dev->flags & ATA_DFLAG_DEVSLP ? " Dev-Sleep" : "", + dev->flags & ATA_DFLAG_NCQ_SEND_RECV ? " NCQ-sndrcv" : "", +- dev->flags & ATA_DFLAG_NCQ_PRIO ? " NCQ-prio" : ""); ++ dev->flags & ATA_DFLAG_NCQ_PRIO ? " NCQ-prio" : "", ++ dev->cpr_log ? " CPR" : ""); + } + + /** +@@ -2634,6 +2686,7 @@ int ata_dev_configure(struct ata_device *dev) + ata_dev_config_sense_reporting(dev); + ata_dev_config_zac(dev); + ata_dev_config_trusted(dev); ++ ata_dev_config_cpr(dev); + dev->cdb_len = 32; + + if (ata_msg_drv(ap) && print_info) +diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c +index 1fb4611f7eeb..15a279f773c7 100644 +--- a/drivers/ata/libata-scsi.c ++++ b/drivers/ata/libata-scsi.c +@@ -1895,7 +1895,7 @@ static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf) + */ + static unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf) + { +- int num_pages; ++ int i, num_pages = 0; + static const u8 pages[] = { + 0x00, /* page 0x00, this page */ + 0x80, /* page 0x80, unit serial no page */ +@@ -1905,13 +1905,17 @@ static unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf) + 0xb1, /* page 0xb1, block device characteristics page */ + 0xb2, /* page 0xb2, thin provisioning page */ + 0xb6, /* page 0xb6, zoned block device characteristics */ ++ 0xb9, /* page 0xb9, concurrent positioning ranges */ + }; + +- num_pages = sizeof(pages); +- if (!(args->dev->flags & ATA_DFLAG_ZAC)) +- num_pages--; ++ for (i = 0; i < sizeof(pages); i++) { ++ if (pages[i] == 0xb6 && ++ !(args->dev->flags & ATA_DFLAG_ZAC)) ++ continue; ++ rbuf[num_pages + 4] = pages[i]; ++ num_pages++; ++ } + rbuf[3] = num_pages; /* number of supported VPD pages */ +- memcpy(rbuf + 4, pages, num_pages); + return 0; + } + +@@ -2121,6 +2125,26 @@ static unsigned int ata_scsiop_inq_b6(struct ata_scsi_args *args, u8 *rbuf) + return 0; + } + ++static unsigned int ata_scsiop_inq_b9(struct ata_scsi_args *args, u8 *rbuf) ++{ ++ struct ata_cpr_log *cpr_log = args->dev->cpr_log; ++ u8 *desc = &rbuf[64]; ++ int i; ++ ++ /* SCSI Concurrent Positioning Ranges VPD page: SBC-5 rev 1 or later */ ++ rbuf[1] = 0xb9; ++ put_unaligned_be16(64 + (int)cpr_log->nr_cpr * 32 - 4, &rbuf[3]); ++ ++ for (i = 0; i < cpr_log->nr_cpr; i++, desc += 32) { ++ desc[0] = cpr_log->cpr[i].num; ++ desc[1] = cpr_log->cpr[i].num_storage_elements; ++ put_unaligned_be64(cpr_log->cpr[i].start_lba, &desc[8]); ++ put_unaligned_be64(cpr_log->cpr[i].num_lbas, &desc[16]); ++ } ++ ++ return 0; ++} ++ + /** + * modecpy - Prepare response for MODE SENSE + * @dest: output buffer +@@ -4120,11 +4144,17 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd) + ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b2); + break; + case 0xb6: +- if (dev->flags & ATA_DFLAG_ZAC) { ++ if (dev->flags & ATA_DFLAG_ZAC) + ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b6); +- break; +- } +- fallthrough; ++ else ++ ata_scsi_set_invalid_field(dev, cmd, 2, 0xff); ++ break; ++ case 0xb9: ++ if (dev->cpr_log) ++ ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b9); ++ else ++ ata_scsi_set_invalid_field(dev, cmd, 2, 0xff); ++ break; + default: + ata_scsi_set_invalid_field(dev, cmd, 2, 0xff); + break; +diff --git a/include/linux/ata.h b/include/linux/ata.h +index 1b44f40c7700..199e47e97d64 100644 +--- a/include/linux/ata.h ++++ b/include/linux/ata.h +@@ -329,6 +329,7 @@ enum { + ATA_LOG_SECURITY = 0x06, + ATA_LOG_SATA_SETTINGS = 0x08, + ATA_LOG_ZONED_INFORMATION = 0x09, ++ ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47, + + /* Identify device SATA settings log:*/ + ATA_LOG_DEVSLP_OFFSET = 0x30, +diff --git a/include/linux/libata.h b/include/linux/libata.h +index c0c64f03e107..236ec689056a 100644 +--- a/include/linux/libata.h ++++ b/include/linux/libata.h +@@ -676,6 +676,18 @@ struct ata_ering { + struct ata_ering_entry ring[ATA_ERING_SIZE]; + }; + ++struct ata_cpr { ++ u8 num; ++ u8 num_storage_elements; ++ u64 start_lba; ++ u64 num_lbas; ++}; ++ ++struct ata_cpr_log { ++ u8 nr_cpr; ++ struct ata_cpr cpr[]; ++}; ++ + struct ata_device { + struct ata_link *link; + unsigned int devno; /* 0 or 1 */ +@@ -735,6 +747,9 @@ struct ata_device { + u32 zac_zones_optimal_nonseq; + u32 zac_zones_max_open; + ++ /* Concurrent positioning ranges */ ++ struct ata_cpr_log *cpr_log; ++ + /* error history */ + int spdn_cnt; + /* ering is CLEAR_END, read comment above CLEAR_END */ +-- +2.35.3 + diff --git a/patches.suse/libbpf-1.0-Deprecate-bpf_map__is_offload_neutral.patch b/patches.suse/libbpf-1.0-Deprecate-bpf_map__is_offload_neutral.patch new file mode 100644 index 0000000..2f0aab4 --- /dev/null +++ b/patches.suse/libbpf-1.0-Deprecate-bpf_map__is_offload_neutral.patch @@ -0,0 +1,47 @@ +From: Christy Lee +Date: Tue, 4 Jan 2022 16:06:01 -0800 +Subject: libbpf 1.0: Deprecate bpf_map__is_offload_neutral() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.17-rc1 +Git-commit: 9855c131b9c8b0327ff5182f88bb1991f212415b +References: jsc#PED-1368 + +Deprecate bpf_map__is_offload_neutral(). It’s most probably broken +already. PERF_EVENT_ARRAY isn’t the only map that’s not suitable +for hardware offloading. Applications can directly check map type +instead. + + [0] Closes: https://github.com/libbpf/libbpf/issues/306 + +Signed-off-by: Christy Lee +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20220105000601.2090044-1-christylee@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/prog.c | 2 +- + tools/lib/bpf/libbpf.h | 1 + + 2 files changed, 2 insertions(+), 1 deletion(-) + +--- a/tools/bpf/bpftool/prog.c ++++ b/tools/bpf/bpftool/prog.c +@@ -1655,7 +1655,7 @@ static int load_with_options(int argc, c + j = 0; + idx = 0; + bpf_object__for_each_map(map, obj) { +- if (!bpf_map__is_offload_neutral(map)) ++ if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) + bpf_map__set_ifindex(map, ifindex); + + if (j < old_map_fds && idx == map_replace[j].idx) { +--- a/tools/lib/bpf/libbpf.h ++++ b/tools/lib/bpf/libbpf.h +@@ -734,6 +734,7 @@ LIBBPF_API void *bpf_map__priv(const str + LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, + const void *data, size_t size); + LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); ++LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__type() instead") + LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); + LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map); + LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path); diff --git a/patches.suse/libbpf-1.0-Deprecate-bpf_object__find_map_by_offset-.patch b/patches.suse/libbpf-1.0-Deprecate-bpf_object__find_map_by_offset-.patch new file mode 100644 index 0000000..952c7fc --- /dev/null +++ b/patches.suse/libbpf-1.0-Deprecate-bpf_object__find_map_by_offset-.patch @@ -0,0 +1,36 @@ +From: Christy Lee +Date: Tue, 4 Jan 2022 16:31:20 -0800 +Subject: libbpf 1.0: Deprecate bpf_object__find_map_by_offset() API +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.17-rc1 +Git-commit: 5f6082642814050352a3e29f8713796b55ebf788 +References: jsc#PED-1368 + +API created with simplistic assumptions about BPF map definitions. +It hasn’t worked for a while, deprecate it in preparation for +libbpf 1.0. + + [0] Closes: https://github.com/libbpf/libbpf/issues/302 + +Signed-off-by: Christy Lee +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20220105003120.2222673-1-christylee@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/tools/lib/bpf/libbpf.h ++++ b/tools/lib/bpf/libbpf.h +@@ -672,7 +672,8 @@ bpf_object__find_map_fd_by_name(const st + * Get bpf_map through the offset of corresponding struct bpf_map_def + * in the BPF object file. + */ +-LIBBPF_API struct bpf_map * ++LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__find_map_by_name() instead") ++struct bpf_map * + bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); + + LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_map() instead") diff --git a/patches.suse/libbpf-Accommodate-DWARF-compiler-bug-with-duplicate.patch b/patches.suse/libbpf-Accommodate-DWARF-compiler-bug-with-duplicate.patch new file mode 100644 index 0000000..72c85f4 --- /dev/null +++ b/patches.suse/libbpf-Accommodate-DWARF-compiler-bug-with-duplicate.patch @@ -0,0 +1,108 @@ +From: Andrii Nakryiko +Date: Wed, 17 Nov 2021 11:41:13 -0800 +Subject: libbpf: Accommodate DWARF/compiler bug with duplicated structs +Patch-mainline: v5.17-rc1 +Git-commit: efdd3eb8015e7447095f02a26eaabd164cd18004 +References: jsc#PED-1368 + +According to [0], compilers sometimes might produce duplicate DWARF +definitions for exactly the same struct/union within the same +compilation unit (CU). We've had similar issues with identical arrays +and handled them with a similar workaround in 6b6e6b1d09aa ("libbpf: +Accomodate DWARF/compiler bug with duplicated identical arrays"). Do the +same for struct/union by ensuring that two structs/unions are exactly +the same, down to the integer values of field referenced type IDs. + +Solving this more generically (allowing referenced types to be +equivalent, but using different type IDs, all within a single CU) +requires a huge complexity increase to handle many-to-many mappings +between canonidal and candidate type graphs. Before we invest in that, +let's see if this approach handles all the instances of this issue in +practice. Thankfully it's pretty rare, it seems. + + [0] https://lore.kernel.org/bpf/YXr2NFlJTAhHdZqq@krava/ + +Reported-by: Jiri Olsa +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211117194114.347675-1-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/btf.c | 45 +++++++++++++++++++++++++++++++++++++++++---- + 1 file changed, 41 insertions(+), 4 deletions(-) + +--- a/tools/lib/bpf/btf.c ++++ b/tools/lib/bpf/btf.c +@@ -3477,8 +3477,8 @@ static long btf_hash_struct(struct btf_t + } + + /* +- * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type +- * IDs. This check is performed during type graph equivalence check and ++ * Check structural compatibility of two STRUCTs/UNIONs, ignoring referenced ++ * type IDs. This check is performed during type graph equivalence check and + * referenced types equivalence is checked separately. + */ + static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2) +@@ -3851,6 +3851,31 @@ static int btf_dedup_identical_arrays(st + return btf_equal_array(t1, t2); + } + ++/* Check if given two types are identical STRUCT/UNION definitions */ ++static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2) ++{ ++ const struct btf_member *m1, *m2; ++ struct btf_type *t1, *t2; ++ int n, i; ++ ++ t1 = btf_type_by_id(d->btf, id1); ++ t2 = btf_type_by_id(d->btf, id2); ++ ++ if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2)) ++ return false; ++ ++ if (!btf_shallow_equal_struct(t1, t2)) ++ return false; ++ ++ m1 = btf_members(t1); ++ m2 = btf_members(t2); ++ for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) { ++ if (m1->type != m2->type) ++ return false; ++ } ++ return true; ++} ++ + /* + * Check equivalence of BTF type graph formed by candidate struct/union (we'll + * call it "candidate graph" in this description for brevity) to a type graph +@@ -3962,6 +3987,8 @@ static int btf_dedup_is_equiv(struct btf + + hypot_type_id = d->hypot_map[canon_id]; + if (hypot_type_id <= BTF_MAX_NR_TYPES) { ++ if (hypot_type_id == cand_id) ++ return 1; + /* In some cases compiler will generate different DWARF types + * for *identical* array type definitions and use them for + * different fields within the *same* struct. This breaks type +@@ -3970,8 +3997,18 @@ static int btf_dedup_is_equiv(struct btf + * types within a single CU. So work around that by explicitly + * allowing identical array types here. + */ +- return hypot_type_id == cand_id || +- btf_dedup_identical_arrays(d, hypot_type_id, cand_id); ++ if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id)) ++ return 1; ++ /* It turns out that similar situation can happen with ++ * struct/union sometimes, sigh... Handle the case where ++ * structs/unions are exactly the same, down to the referenced ++ * type IDs. Anything more complicated (e.g., if referenced ++ * types are different, but equivalent) is *way more* ++ * complicated and requires a many-to-many equivalence mapping. ++ */ ++ if (btf_dedup_identical_structs(d, hypot_type_id, cand_id)) ++ return 1; ++ return 0; + } + + if (btf_dedup_hypot_map_add(d, canon_id, cand_id)) diff --git a/patches.suse/libbpf-Add-API-to-get-set-log_level-at-per-program-l.patch b/patches.suse/libbpf-Add-API-to-get-set-log_level-at-per-program-l.patch new file mode 100644 index 0000000..eb05098 --- /dev/null +++ b/patches.suse/libbpf-Add-API-to-get-set-log_level-at-per-program-l.patch @@ -0,0 +1,80 @@ +From: Andrii Nakryiko +Date: Wed, 1 Dec 2021 15:28:17 -0800 +Subject: libbpf: Add API to get/set log_level at per-program level +Patch-mainline: v5.17-rc1 +Git-commit: dbdd2c7f8cec2d09ae0e1bd707ae6050fa1c105f +References: jsc#PED-1368 + +Add bpf_program__set_log_level() and bpf_program__log_level() to fetch +and adjust log_level sent during BPF_PROG_LOAD command. This allows to +selectively request more or less verbose output in BPF verifier log. + +Also bump libbpf version to 0.7 and make these APIs the first in v0.7. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211201232824.3166325-3-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 14 ++++++++++++++ + tools/lib/bpf/libbpf.h | 2 ++ + tools/lib/bpf/libbpf.map | 6 ++++++ + tools/lib/bpf/libbpf_version.h | 2 +- + 4 files changed, 23 insertions(+), 1 deletion(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -8473,6 +8473,20 @@ int bpf_program__set_flags(struct bpf_pr + return 0; + } + ++__u32 bpf_program__log_level(const struct bpf_program *prog) ++{ ++ return prog->log_level; ++} ++ ++int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level) ++{ ++ if (prog->obj->loaded) ++ return libbpf_err(-EBUSY); ++ ++ prog->log_level = log_level; ++ return 0; ++} ++ + #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ + .sec = sec_pfx, \ + .prog_type = BPF_PROG_TYPE_##ptype, \ +--- a/tools/lib/bpf/libbpf.h ++++ b/tools/lib/bpf/libbpf.h +@@ -498,6 +498,8 @@ bpf_program__set_expected_attach_type(st + + LIBBPF_API __u32 bpf_program__flags(const struct bpf_program *prog); + LIBBPF_API int bpf_program__set_flags(struct bpf_program *prog, __u32 flags); ++LIBBPF_API __u32 bpf_program__log_level(const struct bpf_program *prog); ++LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level); + + LIBBPF_API int + bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, +--- a/tools/lib/bpf/libbpf.map ++++ b/tools/lib/bpf/libbpf.map +@@ -419,3 +419,9 @@ LIBBPF_0.6.0 { + perf_buffer__new_raw; + perf_buffer__new_raw_deprecated; + } LIBBPF_0.5.0; ++ ++LIBBPF_0.7.0 { ++ global: ++ bpf_program__log_level; ++ bpf_program__set_log_level; ++}; +--- a/tools/lib/bpf/libbpf_version.h ++++ b/tools/lib/bpf/libbpf_version.h +@@ -4,6 +4,6 @@ + #define __LIBBPF_VERSION_H + + #define LIBBPF_MAJOR_VERSION 0 +-#define LIBBPF_MINOR_VERSION 6 ++#define LIBBPF_MINOR_VERSION 7 + + #endif /* __LIBBPF_VERSION_H */ diff --git a/patches.suse/libbpf-Add-OPTS-based-bpf_btf_load-API.patch b/patches.suse/libbpf-Add-OPTS-based-bpf_btf_load-API.patch new file mode 100644 index 0000000..47a084c --- /dev/null +++ b/patches.suse/libbpf-Add-OPTS-based-bpf_btf_load-API.patch @@ -0,0 +1,168 @@ +From: Andrii Nakryiko +Date: Thu, 9 Dec 2021 11:38:30 -0800 +Subject: libbpf: Add OPTS-based bpf_btf_load() API +Patch-mainline: v5.17-rc1 +Git-commit: 0ed08d6725b5116aaad7a0082d721286e0a43dca +References: jsc#PED-1368 + +Similar to previous bpf_prog_load() and bpf_map_create() APIs, add +bpf_btf_load() API which is taking optional OPTS struct. Schedule +bpf_load_btf() for deprecation in v0.8 ([0]). + +This makes naming consistent with BPF_BTF_LOAD command, sets up an API +for extensibility in the future, moves options parameters (log-related +fields) into optional options, and also allows to pass log_level +directly. + +It also removes log buffer auto-allocation logic from low-level API +(consistent with bpf_prog_load() behavior), but preserves a special +treatment of log_level == 0 with non-NULL log_buf, which matches +low-level bpf_prog_load() and high-level libbpf APIs for BTF and program +loading behaviors. + + [0] Closes: https://github.com/libbpf/libbpf/issues/419 + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211209193840.1248570-3-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/bpf.c | 59 +++++++++++++++++++++++++++++++++++------- + tools/lib/bpf/bpf.h | 19 ++++++++++++- + tools/lib/bpf/libbpf.map | 1 + tools/lib/bpf/libbpf_probes.c | 2 - + 4 files changed, 69 insertions(+), 12 deletions(-) + +--- a/tools/lib/bpf/bpf.c ++++ b/tools/lib/bpf/bpf.c +@@ -1047,24 +1047,65 @@ int bpf_raw_tracepoint_open(const char * + return libbpf_err_errno(fd); + } + +-int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, +- bool do_log) ++int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_load_opts *opts) + { +- union bpf_attr attr = {}; ++ const size_t attr_sz = offsetofend(union bpf_attr, btf_log_level); ++ union bpf_attr attr; ++ char *log_buf; ++ size_t log_size; ++ __u32 log_level; + int fd; + +- attr.btf = ptr_to_u64(btf); ++ memset(&attr, 0, attr_sz); ++ ++ if (!OPTS_VALID(opts, bpf_btf_load_opts)) ++ return libbpf_err(-EINVAL); ++ ++ log_buf = OPTS_GET(opts, log_buf, NULL); ++ log_size = OPTS_GET(opts, log_size, 0); ++ log_level = OPTS_GET(opts, log_level, 0); ++ ++ if (log_size > UINT_MAX) ++ return libbpf_err(-EINVAL); ++ if (log_size && !log_buf) ++ return libbpf_err(-EINVAL); ++ ++ attr.btf = ptr_to_u64(btf_data); + attr.btf_size = btf_size; ++ /* log_level == 0 and log_buf != NULL means "try loading without ++ * log_buf, but retry with log_buf and log_level=1 on error", which is ++ * consistent across low-level and high-level BTF and program loading ++ * APIs within libbpf and provides a sensible behavior in practice ++ */ ++ if (log_level) { ++ attr.btf_log_buf = ptr_to_u64(log_buf); ++ attr.btf_log_size = (__u32)log_size; ++ attr.btf_log_level = log_level; ++ } + +-retry: +- if (do_log && log_buf && log_buf_size) { +- attr.btf_log_level = 1; +- attr.btf_log_size = log_buf_size; ++ fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz); ++ if (fd < 0 && log_buf && log_level == 0) { + attr.btf_log_buf = ptr_to_u64(log_buf); ++ attr.btf_log_size = (__u32)log_size; ++ attr.btf_log_level = 1; ++ fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz); + } ++ return libbpf_err_errno(fd); ++} + +- fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, sizeof(attr)); ++int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, bool do_log) ++{ ++ LIBBPF_OPTS(bpf_btf_load_opts, opts); ++ int fd; ++ ++retry: ++ if (do_log && log_buf && log_buf_size) { ++ opts.log_buf = log_buf; ++ opts.log_size = log_buf_size; ++ opts.log_level = 1; ++ } + ++ fd = bpf_btf_load(btf, btf_size, &opts); + if (fd < 0 && !do_log && log_buf && log_buf_size) { + do_log = true; + goto retry; +--- a/tools/lib/bpf/bpf.h ++++ b/tools/lib/bpf/bpf.h +@@ -213,6 +213,23 @@ LIBBPF_API int bpf_verify_program(enum b + char *log_buf, size_t log_buf_sz, + int log_level); + ++struct bpf_btf_load_opts { ++ size_t sz; /* size of this struct for forward/backward compatibility */ ++ ++ /* kernel log options */ ++ char *log_buf; ++ __u32 log_level; ++ __u32 log_size; ++}; ++#define bpf_btf_load_opts__last_field log_size ++ ++LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size, ++ const struct bpf_btf_load_opts *opts); ++ ++LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_btf_load() instead") ++LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, ++ __u32 log_buf_size, bool do_log); ++ + LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value, + __u64 flags); + +@@ -340,8 +357,6 @@ LIBBPF_API int bpf_prog_query(int target + __u32 query_flags, __u32 *attach_flags, + __u32 *prog_ids, __u32 *prog_cnt); + LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd); +-LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, +- __u32 log_buf_size, bool do_log); + LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, + __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, + __u64 *probe_offset, __u64 *probe_addr); +--- a/tools/lib/bpf/libbpf.map ++++ b/tools/lib/bpf/libbpf.map +@@ -422,6 +422,7 @@ LIBBPF_0.6.0 { + + LIBBPF_0.7.0 { + global: ++ bpf_btf_load; + bpf_program__log_level; + bpf_program__set_log_level; + }; +--- a/tools/lib/bpf/libbpf_probes.c ++++ b/tools/lib/bpf/libbpf_probes.c +@@ -164,7 +164,7 @@ int libbpf__load_raw_btf(const char *raw + memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len); + memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len); + +- btf_fd = bpf_load_btf(raw_btf, btf_len, NULL, 0, false); ++ btf_fd = bpf_btf_load(raw_btf, btf_len, NULL); + + free(raw_btf); + return btf_fd; diff --git a/patches.suse/libbpf-Add-ability-to-get-set-per-program-load-flags.patch b/patches.suse/libbpf-Add-ability-to-get-set-per-program-load-flags.patch new file mode 100644 index 0000000..dd12900 --- /dev/null +++ b/patches.suse/libbpf-Add-ability-to-get-set-per-program-load-flags.patch @@ -0,0 +1,72 @@ +From: Andrii Nakryiko +Date: Wed, 10 Nov 2021 21:17:57 -0800 +Subject: libbpf: Add ability to get/set per-program load flags +Patch-mainline: v5.17-rc1 +Git-commit: a6ca71583137300f207343d5d950cb1c365ab911 +References: jsc#PED-1368 + +Add bpf_program__flags() API to retrieve prog_flags that will be (or +were) supplied to BPF_PROG_LOAD command. + +Also add bpf_program__set_extra_flags() API to allow to set *extra* +flags, in addition to those determined by program's SEC() definition. +Such flags are logically OR'ed with libbpf-derived flags. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211111051758.92283-2-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 14 ++++++++++++++ + tools/lib/bpf/libbpf.h | 3 +++ + tools/lib/bpf/libbpf.map | 2 ++ + 3 files changed, 19 insertions(+) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -8260,6 +8260,20 @@ void bpf_program__set_expected_attach_ty + prog->expected_attach_type = type; + } + ++__u32 bpf_program__flags(const struct bpf_program *prog) ++{ ++ return prog->prog_flags; ++} ++ ++int bpf_program__set_extra_flags(struct bpf_program *prog, __u32 extra_flags) ++{ ++ if (prog->obj->loaded) ++ return libbpf_err(-EBUSY); ++ ++ prog->prog_flags |= extra_flags; ++ return 0; ++} ++ + #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ + .sec = sec_pfx, \ + .prog_type = BPF_PROG_TYPE_##ptype, \ +--- a/tools/lib/bpf/libbpf.h ++++ b/tools/lib/bpf/libbpf.h +@@ -492,6 +492,9 @@ LIBBPF_API void + bpf_program__set_expected_attach_type(struct bpf_program *prog, + enum bpf_attach_type type); + ++LIBBPF_API __u32 bpf_program__flags(const struct bpf_program *prog); ++LIBBPF_API int bpf_program__set_extra_flags(struct bpf_program *prog, __u32 extra_flags); ++ + LIBBPF_API int + bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, + const char *attach_func_name); +--- a/tools/lib/bpf/libbpf.map ++++ b/tools/lib/bpf/libbpf.map +@@ -397,8 +397,10 @@ LIBBPF_0.6.0 { + bpf_object__prev_program; + bpf_prog_load_deprecated; + bpf_prog_load; ++ bpf_program__flags; + bpf_program__insn_cnt; + bpf_program__insns; ++ bpf_program__set_extra_flags; + btf__add_btf; + btf__add_decl_tag; + btf__raw_data; diff --git a/patches.suse/libbpf-Add-bool-skipped-to-struct-bpf_map.patch b/patches.suse/libbpf-Add-bool-skipped-to-struct-bpf_map.patch new file mode 100644 index 0000000..b0440a5 --- /dev/null +++ b/patches.suse/libbpf-Add-bool-skipped-to-struct-bpf_map.patch @@ -0,0 +1,69 @@ +From: Shuyi Cheng +Date: Fri, 10 Dec 2021 17:39:57 +0800 +Subject: libbpf: Add "bool skipped" to struct bpf_map +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.17-rc1 +Git-commit: 229fae38d0fc0d6ff58d57cbeb1432da55e58d4f +References: jsc#PED-1368 + +Fix error: "failed to pin map: Bad file descriptor, path: +/sys/fs/bpf/_rodata_str1_1." + +In the old kernel, the global data map will not be created, see [0]. So +we should skip the pinning of the global data map to avoid +bpf_object__pin_maps returning error. Therefore, when the map is not +created, we mark “map->skipped" as true and then check during relocation +and during pinning. + +Fixes: 16e0c35c6f7a ("libbpf: Load global data maps lazily on legacy kernels") +Signed-off-by: Shuyi Cheng +Signed-off-by: Andrii Nakryiko +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -431,6 +431,7 @@ struct bpf_map { + char *pin_path; + bool pinned; + bool reused; ++ bool skipped; + __u64 map_extra; + }; + +@@ -5087,8 +5088,10 @@ bpf_object__create_maps(struct bpf_objec + * kernels. + */ + if (bpf_map__is_internal(map) && +- !kernel_supports(obj, FEAT_GLOBAL_DATA)) ++ !kernel_supports(obj, FEAT_GLOBAL_DATA)) { ++ map->skipped = true; + continue; ++ } + + retried = false; + retry: +@@ -5717,8 +5720,7 @@ bpf_object__relocate_data(struct bpf_obj + } else { + const struct bpf_map *map = &obj->maps[relo->map_idx]; + +- if (bpf_map__is_internal(map) && +- !kernel_supports(obj, FEAT_GLOBAL_DATA)) { ++ if (map->skipped) { + pr_warn("prog '%s': relo #%d: kernel doesn't support global data\n", + prog->name, i); + return -ENOTSUP; +@@ -7924,6 +7926,9 @@ int bpf_object__pin_maps(struct bpf_obje + char *pin_path = NULL; + char buf[PATH_MAX]; + ++ if (map->skipped) ++ continue; ++ + if (path) { + int len; + diff --git a/patches.suse/libbpf-Add-doc-comments-for-bpf_program__-un-pin.patch b/patches.suse/libbpf-Add-doc-comments-for-bpf_program__-un-pin.patch new file mode 100644 index 0000000..5085076 --- /dev/null +++ b/patches.suse/libbpf-Add-doc-comments-for-bpf_program__-un-pin.patch @@ -0,0 +1,52 @@ +From: Grant Seltzer +Date: Thu, 9 Dec 2021 18:22:22 -0500 +Subject: libbpf: Add doc comments for bpf_program__(un)pin() +Patch-mainline: v5.17-rc1 +Git-commit: f742fc68ac0da76d96e5713210b0aef771c1dd0f +References: jsc#PED-1368 + +This adds doc comments for the two bpf_program pinning functions, +bpf_program__pin() and bpf_program__unpin() + +Signed-off-by: Grant Seltzer +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211209232222.541733-1-grantseltzer@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.h | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +--- a/tools/lib/bpf/libbpf.h ++++ b/tools/lib/bpf/libbpf.h +@@ -338,7 +338,31 @@ LIBBPF_DEPRECATED_SINCE(0, 7, "multi-ins + LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog, + const char *path, + int instance); ++ ++/** ++ * @brief **bpf_program__pin()** pins the BPF program to a file ++ * in the BPF FS specified by a path. This increments the programs ++ * reference count, allowing it to stay loaded after the process ++ * which loaded it has exited. ++ * ++ * @param prog BPF program to pin, must already be loaded ++ * @param path file path in a BPF file system ++ * @return 0, on success; negative error code, otherwise ++ */ + LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path); ++ ++/** ++ * @brief **bpf_program__unpin()** unpins the BPF program from a file ++ * in the BPFFS specified by a path. This decrements the programs ++ * reference count. ++ * ++ * The file pinning the BPF program can also be unlinked by a different ++ * process in which case this function will return an error. ++ * ++ * @param prog BPF program to unpin ++ * @param path file path to the pin in a BPF file system ++ * @return 0, on success; negative error code, otherwise ++ */ + LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path); + LIBBPF_API void bpf_program__unload(struct bpf_program *prog); + diff --git a/patches.suse/libbpf-Add-doc-comments-in-libbpf.h.patch b/patches.suse/libbpf-Add-doc-comments-in-libbpf.h.patch new file mode 100644 index 0000000..2461acc --- /dev/null +++ b/patches.suse/libbpf-Add-doc-comments-in-libbpf.h.patch @@ -0,0 +1,101 @@ +From: Grant Seltzer +Date: Mon, 6 Dec 2021 15:37:09 -0500 +Subject: libbpf: Add doc comments in libbpf.h +Patch-mainline: v5.17-rc1 +Git-commit: d5284dedccdb9053988278dd30c834d46b8c866d +References: jsc#PED-1368 + +This adds comments above functions in libbpf.h which document +their uses. These comments are of a format that doxygen and sphinx +can pick up and render. These are rendered by libbpf.readthedocs.org + +These doc comments are for: + +- bpf_object__open_file() +- bpf_object__open_mem() +- bpf_program__attach_uprobe() +- bpf_program__attach_uprobe_opts() + +Signed-off-by: Grant Seltzer +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211206203709.332530-1-grantseltzer@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 53 insertions(+) + +--- a/tools/lib/bpf/libbpf.h ++++ b/tools/lib/bpf/libbpf.h +@@ -112,8 +112,30 @@ struct bpf_object_open_opts { + #define bpf_object_open_opts__last_field btf_custom_path + + LIBBPF_API struct bpf_object *bpf_object__open(const char *path); ++ ++/** ++ * @brief **bpf_object__open_file()** creates a bpf_object by opening ++ * the BPF ELF object file pointed to by the passed path and loading it ++ * into memory. ++ * @param path BPF object file path ++ * @param opts options for how to load the bpf object, this parameter is ++ * optional and can be set to NULL ++ * @return pointer to the new bpf_object; or NULL is returned on error, ++ * error code is stored in errno ++ */ + LIBBPF_API struct bpf_object * + bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts); ++ ++/** ++ * @brief **bpf_object__open_mem()** creates a bpf_object by reading ++ * the BPF objects raw bytes from a memory buffer containing a valid ++ * BPF ELF object file. ++ * @param obj_buf pointer to the buffer containing ELF file bytes ++ * @param obj_buf_sz number of bytes in the buffer ++ * @param opts options for how to load the bpf object ++ * @return pointer to the new bpf_object; or NULL is returned on error, ++ * error code is stored in errno ++ */ + LIBBPF_API struct bpf_object * + bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, + const struct bpf_object_open_opts *opts); +@@ -347,10 +369,41 @@ struct bpf_uprobe_opts { + }; + #define bpf_uprobe_opts__last_field retprobe + ++/** ++ * @brief **bpf_program__attach_uprobe()** attaches a BPF program ++ * to the userspace function which is found by binary path and ++ * offset. You can optionally specify a particular proccess to attach ++ * to. You can also optionally attach the program to the function ++ * exit instead of entry. ++ * ++ * @param prog BPF program to attach ++ * @param retprobe Attach to function exit ++ * @param pid Process ID to attach the uprobe to, 0 for self (own process), ++ * -1 for all processes ++ * @param binary_path Path to binary that contains the function symbol ++ * @param func_offset Offset within the binary of the function symbol ++ * @return Reference to the newly created BPF link; or NULL is returned on error, ++ * error code is stored in errno ++ */ + LIBBPF_API struct bpf_link * + bpf_program__attach_uprobe(const struct bpf_program *prog, bool retprobe, + pid_t pid, const char *binary_path, + size_t func_offset); ++ ++/** ++ * @brief **bpf_program__attach_uprobe_opts()** is just like ++ * bpf_program__attach_uprobe() except with a options struct ++ * for various configurations. ++ * ++ * @param prog BPF program to attach ++ * @param pid Process ID to attach the uprobe to, 0 for self (own process), ++ * -1 for all processes ++ * @param binary_path Path to binary that contains the function symbol ++ * @param func_offset Offset within the binary of the function symbol ++ * @param opts Options for altering program attachment ++ * @return Reference to the newly created BPF link; or NULL is returned on error, ++ * error code is stored in errno ++ */ + LIBBPF_API struct bpf_link * + bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, + const char *binary_path, size_t func_offset, diff --git a/patches.suse/libbpf-Add-documentation-for-bpf_map-batch-operation.patch b/patches.suse/libbpf-Add-documentation-for-bpf_map-batch-operation.patch new file mode 100644 index 0000000..9e3dd75 --- /dev/null +++ b/patches.suse/libbpf-Add-documentation-for-bpf_map-batch-operation.patch @@ -0,0 +1,192 @@ +From: Grant Seltzer +Date: Thu, 6 Jan 2022 15:13:05 -0500 +Subject: libbpf: Add documentation for bpf_map batch operations +Patch-mainline: v5.17-rc1 +Git-commit: e59618f0f46fa6cf86d5b82380e0f453756b282b +References: jsc#PED-1368 + +This adds documention for: + +- bpf_map_delete_batch() +- bpf_map_lookup_batch() +- bpf_map_lookup_and_delete_batch() +- bpf_map_update_batch() + +This also updates the public API for the `keys` parameter +of `bpf_map_delete_batch()`, and both the +`keys` and `values` parameters of `bpf_map_update_batch()` +to be constants. + +Signed-off-by: Grant Seltzer +Signed-off-by: Andrii Nakryiko +Acked-by: Yonghong Song +Link: https://lore.kernel.org/bpf/20220106201304.112675-1-grantseltzer@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/bpf.c | 8 +-- + tools/lib/bpf/bpf.h | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 117 insertions(+), 6 deletions(-) + +--- a/tools/lib/bpf/bpf.c ++++ b/tools/lib/bpf/bpf.c +@@ -691,11 +691,11 @@ static int bpf_map_batch_common(int cmd, + return libbpf_err_errno(ret); + } + +-int bpf_map_delete_batch(int fd, void *keys, __u32 *count, ++int bpf_map_delete_batch(int fd, const void *keys, __u32 *count, + const struct bpf_map_batch_opts *opts) + { + return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL, +- NULL, keys, NULL, count, opts); ++ NULL, (void *)keys, NULL, count, opts); + } + + int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys, +@@ -715,11 +715,11 @@ int bpf_map_lookup_and_delete_batch(int + count, opts); + } + +-int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count, ++int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *count, + const struct bpf_map_batch_opts *opts) + { + return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL, +- keys, values, count, opts); ++ (void *)keys, (void *)values, count, opts); + } + + int bpf_obj_pin(int fd, const char *pathname) +--- a/tools/lib/bpf/bpf.h ++++ b/tools/lib/bpf/bpf.h +@@ -254,17 +254,128 @@ struct bpf_map_batch_opts { + }; + #define bpf_map_batch_opts__last_field flags + +-LIBBPF_API int bpf_map_delete_batch(int fd, void *keys, ++ ++/** ++ * @brief **bpf_map_delete_batch()** allows for batch deletion of multiple ++ * elements in a BPF map. ++ * ++ * @param fd BPF map file descriptor ++ * @param keys pointer to an array of *count* keys ++ * @param count input and output parameter; on input **count** represents the ++ * number of elements in the map to delete in batch; ++ * on output if a non-EFAULT error is returned, **count** represents the number of deleted ++ * elements if the output **count** value is not equal to the input **count** value ++ * If EFAULT is returned, **count** should not be trusted to be correct. ++ * @param opts options for configuring the way the batch deletion works ++ * @return 0, on success; negative error code, otherwise (errno is also set to ++ * the error code) ++ */ ++LIBBPF_API int bpf_map_delete_batch(int fd, const void *keys, + __u32 *count, + const struct bpf_map_batch_opts *opts); ++ ++/** ++ * @brief **bpf_map_lookup_batch()** allows for batch lookup of BPF map elements. ++ * ++ * The parameter *in_batch* is the address of the first element in the batch to read. ++ * *out_batch* is an output parameter that should be passed as *in_batch* to subsequent ++ * calls to **bpf_map_lookup_batch()**. NULL can be passed for *in_batch* to indicate ++ * that the batched lookup starts from the beginning of the map. ++ * ++ * The *keys* and *values* are output parameters which must point to memory large enough to ++ * hold *count* items based on the key and value size of the map *map_fd*. The *keys* ++ * buffer must be of *key_size* * *count*. The *values* buffer must be of ++ * *value_size* * *count*. ++ * ++ * @param fd BPF map file descriptor ++ * @param in_batch address of the first element in batch to read, can pass NULL to ++ * indicate that the batched lookup starts from the beginning of the map. ++ * @param out_batch output parameter that should be passed to next call as *in_batch* ++ * @param keys pointer to an array large enough for *count* keys ++ * @param values pointer to an array large enough for *count* values ++ * @param count input and output parameter; on input it's the number of elements ++ * in the map to read in batch; on output it's the number of elements that were ++ * successfully read. ++ * If a non-EFAULT error is returned, count will be set as the number of elements ++ * that were read before the error occurred. ++ * If EFAULT is returned, **count** should not be trusted to be correct. ++ * @param opts options for configuring the way the batch lookup works ++ * @return 0, on success; negative error code, otherwise (errno is also set to ++ * the error code) ++ */ + LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, + void *keys, void *values, __u32 *count, + const struct bpf_map_batch_opts *opts); ++ ++/** ++ * @brief **bpf_map_lookup_and_delete_batch()** allows for batch lookup and deletion ++ * of BPF map elements where each element is deleted after being retrieved. ++ * ++ * @param fd BPF map file descriptor ++ * @param in_batch address of the first element in batch to read, can pass NULL to ++ * get address of the first element in *out_batch* ++ * @param out_batch output parameter that should be passed to next call as *in_batch* ++ * @param keys pointer to an array of *count* keys ++ * @param values pointer to an array large enough for *count* values ++ * @param count input and output parameter; on input it's the number of elements ++ * in the map to read and delete in batch; on output it represents the number of ++ * elements that were successfully read and deleted ++ * If a non-**EFAULT** error code is returned and if the output **count** value ++ * is not equal to the input **count** value, up to **count** elements may ++ * have been deleted. ++ * if **EFAULT** is returned up to *count* elements may have been deleted without ++ * being returned via the *keys* and *values* output parameters. ++ * @param opts options for configuring the way the batch lookup and delete works ++ * @return 0, on success; negative error code, otherwise (errno is also set to ++ * the error code) ++ */ + LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, + void *out_batch, void *keys, + void *values, __u32 *count, + const struct bpf_map_batch_opts *opts); +-LIBBPF_API int bpf_map_update_batch(int fd, void *keys, void *values, ++ ++/** ++ * @brief **bpf_map_update_batch()** updates multiple elements in a map ++ * by specifying keys and their corresponding values. ++ * ++ * The *keys* and *values* parameters must point to memory large enough ++ * to hold *count* items based on the key and value size of the map. ++ * ++ * The *opts* parameter can be used to control how *bpf_map_update_batch()* ++ * should handle keys that either do or do not already exist in the map. ++ * In particular the *flags* parameter of *bpf_map_batch_opts* can be ++ * one of the following: ++ * ++ * Note that *count* is an input and output parameter, where on output it ++ * represents how many elements were successfully updated. Also note that if ++ * **EFAULT** then *count* should not be trusted to be correct. ++ * ++ * **BPF_ANY** ++ * Create new elements or update existing. ++ * ++ * **BPF_NOEXIST** ++ * Create new elements only if they do not exist. ++ * ++ * **BPF_EXIST** ++ * Update existing elements. ++ * ++ * **BPF_F_LOCK** ++ * Update spin_lock-ed map elements. This must be ++ * specified if the map value contains a spinlock. ++ * ++ * @param fd BPF map file descriptor ++ * @param keys pointer to an array of *count* keys ++ * @param values pointer to an array of *count* values ++ * @param count input and output parameter; on input it's the number of elements ++ * in the map to update in batch; on output if a non-EFAULT error is returned, ++ * **count** represents the number of updated elements if the output **count** ++ * value is not equal to the input **count** value. ++ * If EFAULT is returned, **count** should not be trusted to be correct. ++ * @param opts options for configuring the way the batch update works ++ * @return 0, on success; negative error code, otherwise (errno is also set to ++ * the error code) ++ */ ++LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values, + __u32 *count, + const struct bpf_map_batch_opts *opts); + diff --git a/patches.suse/libbpf-Add-per-program-log-buffer-setter-and-getter.patch b/patches.suse/libbpf-Add-per-program-log-buffer-setter-and-getter.patch new file mode 100644 index 0000000..d46a9b2 --- /dev/null +++ b/patches.suse/libbpf-Add-per-program-log-buffer-setter-and-getter.patch @@ -0,0 +1,225 @@ +From: Andrii Nakryiko +Date: Thu, 9 Dec 2021 11:38:35 -0800 +Subject: libbpf: Add per-program log buffer setter and getter +Patch-mainline: v5.17-rc1 +Git-commit: b3ce907950350a58880b94fed2b6022f160b8b9a +References: jsc#PED-1368 + +Allow to set user-provided log buffer on a per-program basis ([0]). This +gives great deal of flexibility in terms of which programs are loaded +with logging enabled and where corresponding logs go. + +Log buffer set with bpf_program__set_log_buf() overrides kernel_log_buf +and kernel_log_size settings set at bpf_object open time through +bpf_object_open_opts, if any. + +Adjust bpf_object_load_prog_instance() logic to not perform own log buf +allocation and load retry if custom log buffer is provided by the user. + + [0] Closes: https://github.com/libbpf/libbpf/issues/418 + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211209193840.1248570-8-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 92 ++++++++++++++++++++++++++++++++++++++--------- + tools/lib/bpf/libbpf.h | 7 +++ + tools/lib/bpf/libbpf.map | 2 + + 3 files changed, 84 insertions(+), 17 deletions(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -331,7 +331,11 @@ struct bpf_program { + + struct reloc_desc *reloc_desc; + int nr_reloc; +- int log_level; ++ ++ /* BPF verifier log settings */ ++ char *log_buf; ++ size_t log_size; ++ __u32 log_level; + + struct { + int nr; +@@ -713,6 +717,9 @@ bpf_object__init_prog(struct bpf_object + prog->instances.fds = NULL; + prog->instances.nr = -1; + ++ /* inherit object's log_level */ ++ prog->log_level = obj->log_level; ++ + prog->sec_name = strdup(sec_name); + if (!prog->sec_name) + goto errout; +@@ -6591,8 +6598,10 @@ static int bpf_object_load_prog_instance + const char *prog_name = NULL; + char *cp, errmsg[STRERR_BUFSIZE]; + size_t log_buf_size = 0; +- char *log_buf = NULL; ++ char *log_buf = NULL, *tmp; + int btf_fd, ret, err; ++ bool own_log_buf = true; ++ __u32 log_level = prog->log_level; + + if (prog->type == BPF_PROG_TYPE_UNSPEC) { + /* +@@ -6627,7 +6636,7 @@ static int bpf_object_load_prog_instance + load_attr.line_info_rec_size = prog->line_info_rec_size; + load_attr.line_info_cnt = prog->line_info_cnt; + } +- load_attr.log_level = prog->log_level; ++ load_attr.log_level = log_level; + load_attr.prog_flags = prog->prog_flags; + load_attr.fd_array = obj->fd_array; + +@@ -6648,21 +6657,42 @@ static int bpf_object_load_prog_instance + *prog_fd = -1; + return 0; + } +-retry_load: +- if (log_buf_size) { +- log_buf = malloc(log_buf_size); +- if (!log_buf) +- return -ENOMEM; + +- *log_buf = 0; ++retry_load: ++ /* if log_level is zero, we don't request logs initiallly even if ++ * custom log_buf is specified; if the program load fails, then we'll ++ * bump log_level to 1 and use either custom log_buf or we'll allocate ++ * our own and retry the load to get details on what failed ++ */ ++ if (log_level) { ++ if (prog->log_buf) { ++ log_buf = prog->log_buf; ++ log_buf_size = prog->log_size; ++ own_log_buf = false; ++ } else if (obj->log_buf) { ++ log_buf = obj->log_buf; ++ log_buf_size = obj->log_size; ++ own_log_buf = false; ++ } else { ++ log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2); ++ tmp = realloc(log_buf, log_buf_size); ++ if (!tmp) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ log_buf = tmp; ++ log_buf[0] = '\0'; ++ own_log_buf = true; ++ } + } + + load_attr.log_buf = log_buf; + load_attr.log_size = log_buf_size; +- ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr); ++ load_attr.log_level = log_level; + ++ ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr); + if (ret >= 0) { +- if (log_buf && load_attr.log_level) { ++ if (log_level && own_log_buf) { + pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", + prog->name, log_buf); + } +@@ -6690,19 +6720,26 @@ retry_load: + goto out; + } + +- if (!log_buf || errno == ENOSPC) { +- log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, +- log_buf_size << 1); +- free(log_buf); ++ if (log_level == 0) { ++ log_level = 1; + goto retry_load; + } ++ /* On ENOSPC, increase log buffer size and retry, unless custom ++ * log_buf is specified. ++ * Be careful to not overflow u32, though. Kernel's log buf size limit ++ * isn't part of UAPI so it can always be bumped to full 4GB. So don't ++ * multiply by 2 unless we are sure we'll fit within 32 bits. ++ * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2). ++ */ ++ if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2) ++ goto retry_load; + + ret = -errno; + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); + pr_perm_msg(ret); + +- if (log_buf && log_buf[0] != '\0') { ++ if (own_log_buf && log_buf && log_buf[0] != '\0') { + pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", + prog->name, log_buf); + } +@@ -6712,7 +6749,8 @@ retry_load: + } + + out: +- free(log_buf); ++ if (own_log_buf) ++ free(log_buf); + return ret; + } + +@@ -8496,6 +8534,26 @@ int bpf_program__set_log_level(struct bp + return 0; + } + ++const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size) ++{ ++ *log_size = prog->log_size; ++ return prog->log_buf; ++} ++ ++int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size) ++{ ++ if (log_size && !log_buf) ++ return -EINVAL; ++ if (prog->log_size > UINT_MAX) ++ return -EINVAL; ++ if (prog->obj->loaded) ++ return -EBUSY; ++ ++ prog->log_buf = log_buf; ++ prog->log_size = log_size; ++ return 0; ++} ++ + #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ + .sec = sec_pfx, \ + .prog_type = BPF_PROG_TYPE_##ptype, \ +--- a/tools/lib/bpf/libbpf.h ++++ b/tools/lib/bpf/libbpf.h +@@ -590,8 +590,15 @@ bpf_program__set_expected_attach_type(st + + LIBBPF_API __u32 bpf_program__flags(const struct bpf_program *prog); + LIBBPF_API int bpf_program__set_flags(struct bpf_program *prog, __u32 flags); ++ ++/* Per-program log level and log buffer getters/setters. ++ * See bpf_object_open_opts comments regarding log_level and log_buf ++ * interactions. ++ */ + LIBBPF_API __u32 bpf_program__log_level(const struct bpf_program *prog); + LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level); ++LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size); ++LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size); + + LIBBPF_API int + bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, +--- a/tools/lib/bpf/libbpf.map ++++ b/tools/lib/bpf/libbpf.map +@@ -423,6 +423,8 @@ LIBBPF_0.6.0 { + LIBBPF_0.7.0 { + global: + bpf_btf_load; ++ bpf_program__log_buf; + bpf_program__log_level; ++ bpf_program__set_log_buf; + bpf_program__set_log_level; + }; diff --git a/patches.suse/libbpf-Add-runtime-APIs-to-query-libbpf-version.patch b/patches.suse/libbpf-Add-runtime-APIs-to-query-libbpf-version.patch new file mode 100644 index 0000000..f949bda --- /dev/null +++ b/patches.suse/libbpf-Add-runtime-APIs-to-query-libbpf-version.patch @@ -0,0 +1,88 @@ +From: Andrii Nakryiko +Date: Thu, 18 Nov 2021 09:40:54 -0800 +Subject: libbpf: Add runtime APIs to query libbpf version +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.17-rc1 +Git-commit: 7615209f42a1976894cd0df97a380a034911656a +References: jsc#PED-1368 + +Libbpf provided LIBBPF_MAJOR_VERSION and LIBBPF_MINOR_VERSION macros to +check libbpf version at compilation time. This doesn't cover all the +needs, though, because version of libbpf that application is compiled +against doesn't necessarily match the version of libbpf at runtime, +especially if libbpf is used as a shared library. + +Add libbpf_major_version() and libbpf_minor_version() returning major +and minor versions, respectively, as integers. Also add a convenience +libbpf_version_string() for various tooling using libbpf to print out +libbpf version in a human-readable form. Currently it will return +"v0.6", but in the future it can contains some extra information, so the +format itself is not part of a stable API and shouldn't be relied upon. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Acked-by: John Fastabend +Acked-by: Toke Høiland-Jørgensen +Link: https://lore.kernel.org/bpf/20211118174054.2699477-1-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 19 +++++++++++++++++++ + tools/lib/bpf/libbpf.h | 4 ++++ + tools/lib/bpf/libbpf.map | 3 +++ + 3 files changed, 26 insertions(+) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -168,6 +168,25 @@ int libbpf_set_strict_mode(enum libbpf_s + return 0; + } + ++__u32 libbpf_major_version(void) ++{ ++ return LIBBPF_MAJOR_VERSION; ++} ++ ++__u32 libbpf_minor_version(void) ++{ ++ return LIBBPF_MINOR_VERSION; ++} ++ ++const char *libbpf_version_string(void) ++{ ++#define __S(X) #X ++#define _S(X) __S(X) ++ return "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION); ++#undef _S ++#undef __S ++} ++ + enum kern_feature_id { + /* v4.14: kernel support for program & map names. */ + FEAT_PROG_NAME, +--- a/tools/lib/bpf/libbpf.h ++++ b/tools/lib/bpf/libbpf.h +@@ -24,6 +24,10 @@ + extern "C" { + #endif + ++LIBBPF_API __u32 libbpf_major_version(void); ++LIBBPF_API __u32 libbpf_minor_version(void); ++LIBBPF_API const char *libbpf_version_string(void); ++ + enum libbpf_errno { + __LIBBPF_ERRNO__START = 4000, + +--- a/tools/lib/bpf/libbpf.map ++++ b/tools/lib/bpf/libbpf.map +@@ -410,6 +410,9 @@ LIBBPF_0.6.0 { + btf__type_cnt; + btf_dump__new; + btf_dump__new_deprecated; ++ libbpf_major_version; ++ libbpf_minor_version; ++ libbpf_version_string; + perf_buffer__new; + perf_buffer__new_deprecated; + perf_buffer__new_raw; diff --git a/patches.suse/libbpf-Add-sane-strncpy-alternative-and-use-it-inter.patch b/patches.suse/libbpf-Add-sane-strncpy-alternative-and-use-it-inter.patch new file mode 100644 index 0000000..342c6a7 --- /dev/null +++ b/patches.suse/libbpf-Add-sane-strncpy-alternative-and-use-it-inter.patch @@ -0,0 +1,178 @@ +From: Andrii Nakryiko +Date: Fri, 10 Dec 2021 16:40:43 -0800 +Subject: libbpf: Add sane strncpy alternative and use it internally +Patch-mainline: v5.17-rc1 +Git-commit: 9fc205b413b3f3e9502fa92151fba63b91230454 +References: jsc#PED-1368 + +strncpy() has a notoriously error-prone semantics which makes GCC +complain about it a lot (and quite often completely completely falsely +at that). Instead of pleasing GCC all the time (-Wno-stringop-truncation +is unfortunately only supported by GCC, so it's a bit too messy to just +enable it in Makefile), add libbpf-internal libbpf_strlcpy() helper +which follows what FreeBSD's strlcpy() does and what most people would +expect from strncpy(): copies up to N-1 first bytes from source string +into destination string and ensures zero-termination afterwards. + +Replace all the relevant uses of strncpy/strncat/memcpy in libbpf with +libbpf_strlcpy(). + +This also fixes the issue reported by Emmanuel Deloget in xsk.c where +memcpy() could access source string beyond its end. + +Fixes: 2f6324a3937f8 (libbpf: Support shared umems between queues and devices) +Reported-by: Emmanuel Deloget +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211211004043.2374068-1-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/bpf.c | 4 ++-- + tools/lib/bpf/btf_dump.c | 4 ++-- + tools/lib/bpf/gen_loader.c | 6 ++---- + tools/lib/bpf/libbpf.c | 8 +++----- + tools/lib/bpf/libbpf_internal.h | 19 +++++++++++++++++++ + tools/lib/bpf/xsk.c | 9 +++------ + 6 files changed, 31 insertions(+), 19 deletions(-) + +--- a/tools/lib/bpf/bpf.c ++++ b/tools/lib/bpf/bpf.c +@@ -112,7 +112,7 @@ int bpf_map_create(enum bpf_map_type map + + attr.map_type = map_type; + if (map_name) +- strncat(attr.map_name, map_name, sizeof(attr.map_name) - 1); ++ libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); + attr.key_size = key_size; + attr.value_size = value_size; + attr.max_entries = max_entries; +@@ -271,7 +271,7 @@ int bpf_prog_load_v0_6_0(enum bpf_prog_t + attr.kern_version = OPTS_GET(opts, kern_version, 0); + + if (prog_name) +- strncat(attr.prog_name, prog_name, sizeof(attr.prog_name) - 1); ++ libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); + attr.license = ptr_to_u64(license); + + if (insn_cnt > UINT_MAX) +--- a/tools/lib/bpf/btf_dump.c ++++ b/tools/lib/bpf/btf_dump.c +@@ -2321,8 +2321,8 @@ int btf_dump__dump_type_data(struct btf_ + if (!opts->indent_str) + d->typed_dump->indent_str[0] = '\t'; + else +- strncat(d->typed_dump->indent_str, opts->indent_str, +- sizeof(d->typed_dump->indent_str) - 1); ++ libbpf_strlcpy(d->typed_dump->indent_str, opts->indent_str, ++ sizeof(d->typed_dump->indent_str)); + + d->typed_dump->compact = OPTS_GET(opts, compact, false); + d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false); +--- a/tools/lib/bpf/gen_loader.c ++++ b/tools/lib/bpf/gen_loader.c +@@ -463,8 +463,7 @@ void bpf_gen__map_create(struct bpf_gen + attr.map_flags = map_attr->map_flags; + attr.map_extra = map_attr->map_extra; + if (map_name) +- memcpy(attr.map_name, map_name, +- min((unsigned)strlen(map_name), BPF_OBJ_NAME_LEN - 1)); ++ libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); + attr.numa_node = map_attr->numa_node; + attr.map_ifindex = map_attr->map_ifindex; + attr.max_entries = max_entries; +@@ -970,8 +969,7 @@ void bpf_gen__prog_load(struct bpf_gen * + core_relos = add_data(gen, gen->core_relos, + attr.core_relo_cnt * attr.core_relo_rec_size); + +- memcpy(attr.prog_name, prog_name, +- min((unsigned)strlen(prog_name), BPF_OBJ_NAME_LEN - 1)); ++ libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); + prog_load_attr = add_data(gen, &attr, attr_size); + + /* populate union bpf_attr with a pointer to license */ +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -1201,12 +1201,10 @@ static struct bpf_object *bpf_object__ne + + strcpy(obj->path, path); + if (obj_name) { +- strncpy(obj->name, obj_name, sizeof(obj->name) - 1); +- obj->name[sizeof(obj->name) - 1] = 0; ++ libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name)); + } else { + /* Using basename() GNU version which doesn't modify arg. */ +- strncpy(obj->name, basename((void *)path), +- sizeof(obj->name) - 1); ++ libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name)); + end = strchr(obj->name, '.'); + if (end) + *end = 0; +@@ -1358,7 +1356,7 @@ static int bpf_object__check_endianness( + static int + bpf_object__init_license(struct bpf_object *obj, void *data, size_t size) + { +- memcpy(obj->license, data, min(size, sizeof(obj->license) - 1)); ++ libbpf_strlcpy(obj->license, data, sizeof(obj->license)); + pr_debug("license of %s is %s\n", obj->path, obj->license); + return 0; + } +--- a/tools/lib/bpf/libbpf_internal.h ++++ b/tools/lib/bpf/libbpf_internal.h +@@ -169,6 +169,25 @@ static inline void *libbpf_reallocarray( + return realloc(ptr, total); + } + ++/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst ++ * is zero-terminated string no matter what (unless sz == 0, in which case ++ * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs ++ * in what is returned. Given this is internal helper, it's trivial to extend ++ * this, when necessary. Use this instead of strncpy inside libbpf source code. ++ */ ++static inline void libbpf_strlcpy(char *dst, const char *src, size_t sz) ++{ ++ size_t i; ++ ++ if (sz == 0) ++ return; ++ ++ sz--; ++ for (i = 0; i < sz && src[i]; i++) ++ dst[i] = src[i]; ++ dst[i] = '\0'; ++} ++ + struct btf; + struct btf_type; + +--- a/tools/lib/bpf/xsk.c ++++ b/tools/lib/bpf/xsk.c +@@ -548,8 +548,7 @@ static int xsk_get_max_queues(struct xsk + return -errno; + + ifr.ifr_data = (void *)&channels; +- memcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ - 1); +- ifr.ifr_name[IFNAMSIZ - 1] = '\0'; ++ libbpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ); + err = ioctl(fd, SIOCETHTOOL, &ifr); + if (err && errno != EOPNOTSUPP) { + ret = -errno; +@@ -768,8 +767,7 @@ static int xsk_create_xsk_struct(int ifi + } + + ctx->ifindex = ifindex; +- memcpy(ctx->ifname, ifname, IFNAMSIZ -1); +- ctx->ifname[IFNAMSIZ - 1] = 0; ++ libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); + + xsk->ctx = ctx; + xsk->ctx->has_bpf_link = xsk_probe_bpf_link(); +@@ -951,8 +949,7 @@ static struct xsk_ctx *xsk_create_ctx(st + ctx->refcount = 1; + ctx->umem = umem; + ctx->queue_id = queue_id; +- memcpy(ctx->ifname, ifname, IFNAMSIZ - 1); +- ctx->ifname[IFNAMSIZ - 1] = '\0'; ++ libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); + + ctx->fill = fill; + ctx->comp = comp; diff --git a/patches.suse/libbpf-Allow-passing-preallocated-log_buf-when-loadi.patch b/patches.suse/libbpf-Allow-passing-preallocated-log_buf-when-loadi.patch new file mode 100644 index 0000000..c3e0668 --- /dev/null +++ b/patches.suse/libbpf-Allow-passing-preallocated-log_buf-when-loadi.patch @@ -0,0 +1,182 @@ +From: Andrii Nakryiko +Date: Thu, 9 Dec 2021 11:38:31 -0800 +Subject: libbpf: Allow passing preallocated log_buf when loading BTF into + kernel +Patch-mainline: v5.17-rc1 +Git-commit: 1a190d1e8eb9ff84354e38f7482dc77b626f3cc9 +References: jsc#PED-1368 + +Add libbpf-internal btf_load_into_kernel() that allows to pass +preallocated log_buf and custom log_level to be passed into kernel +during BPF_BTF_LOAD call. When custom log_buf is provided, +btf_load_into_kernel() won't attempt an retry with automatically +allocated internal temporary buffer to capture BTF validation log. + +It's important to note the relation between log_buf and log_level, which +slightly deviates from stricter kernel logic. From kernel's POV, if +log_buf is specified, log_level has to be > 0, and vice versa. While +kernel has good reasons to request such "sanity, this, in practice, is +a bit unconvenient and restrictive for libbpf's high-level bpf_object APIs. + +So libbpf will allow to set non-NULL log_buf and log_level == 0. This is +fine and means to attempt to load BTF without logging requested, but if +it failes, retry the load with custom log_buf and log_level 1. Similar +logic will be implemented for program loading. In practice this means +that users can provide custom log buffer just in case error happens, but +not really request slower verbose logging all the time. This is also +consistent with libbpf behavior when custom log_buf is not set: libbpf +first tries to load everything with log_level=0, and only if error +happens allocates internal log buffer and retries with log_level=1. + +Also, while at it, make BTF validation log more obvious and follow the log +pattern libbpf is using for dumping BPF verifier log during +BPF_PROG_LOAD. BTF loading resulting in an error will look like this: + +libbpf: BTF loading error: -22 +libbpf: -- BEGIN BTF LOAD LOG --- +magic: 0xeb9f +version: 1 +flags: 0x0 +hdr_len: 24 +type_off: 0 +type_len: 1040 +str_off: 1040 +str_len: 2063598257 +btf_total_size: 1753 +Total section length too long +-- END BTF LOAD LOG -- +libbpf: Error loading .BTF into kernel: -22. BTF is optional, ignoring. + +This makes it much easier to find relevant parts in libbpf log output. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211209193840.1248570-4-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/btf.c | 78 ++++++++++++++++++++++++++++------------ + tools/lib/bpf/libbpf_internal.h | 1 + 2 files changed, 56 insertions(+), 23 deletions(-) + +--- a/tools/lib/bpf/btf.c ++++ b/tools/lib/bpf/btf.c +@@ -1124,54 +1124,86 @@ struct btf *btf__parse_split(const char + + static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); + +-int btf__load_into_kernel(struct btf *btf) ++int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level) + { +- __u32 log_buf_size = 0, raw_size; +- char *log_buf = NULL; ++ LIBBPF_OPTS(bpf_btf_load_opts, opts); ++ __u32 buf_sz = 0, raw_size; ++ char *buf = NULL, *tmp; + void *raw_data; + int err = 0; + + if (btf->fd >= 0) + return libbpf_err(-EEXIST); ++ if (log_sz && !log_buf) ++ return libbpf_err(-EINVAL); + +-retry_load: +- if (log_buf_size) { +- log_buf = malloc(log_buf_size); +- if (!log_buf) +- return libbpf_err(-ENOMEM); +- +- *log_buf = 0; +- } +- ++ /* cache native raw data representation */ + raw_data = btf_get_raw_data(btf, &raw_size, false); + if (!raw_data) { + err = -ENOMEM; + goto done; + } +- /* cache native raw data representation */ + btf->raw_size = raw_size; + btf->raw_data = raw_data; + +- btf->fd = bpf_load_btf(raw_data, raw_size, log_buf, log_buf_size, false); ++retry_load: ++ /* if log_level is 0, we won't provide log_buf/log_size to the kernel, ++ * initially. Only if BTF loading fails, we bump log_level to 1 and ++ * retry, using either auto-allocated or custom log_buf. This way ++ * non-NULL custom log_buf provides a buffer just in case, but hopes ++ * for successful load and no need for log_buf. ++ */ ++ if (log_level) { ++ /* if caller didn't provide custom log_buf, we'll keep ++ * allocating our own progressively bigger buffers for BTF ++ * verification log ++ */ ++ if (!log_buf) { ++ buf_sz = max((__u32)BPF_LOG_BUF_SIZE, buf_sz * 2); ++ tmp = realloc(buf, buf_sz); ++ if (!tmp) { ++ err = -ENOMEM; ++ goto done; ++ } ++ buf = tmp; ++ buf[0] = '\0'; ++ } ++ ++ opts.log_buf = log_buf ? log_buf : buf; ++ opts.log_size = log_buf ? log_sz : buf_sz; ++ opts.log_level = log_level; ++ } ++ ++ btf->fd = bpf_btf_load(raw_data, raw_size, &opts); + if (btf->fd < 0) { +- if (!log_buf || errno == ENOSPC) { +- log_buf_size = max((__u32)BPF_LOG_BUF_SIZE, +- log_buf_size << 1); +- free(log_buf); ++ /* time to turn on verbose mode and try again */ ++ if (log_level == 0) { ++ log_level = 1; + goto retry_load; + } ++ /* only retry if caller didn't provide custom log_buf, but ++ * make sure we can never overflow buf_sz ++ */ ++ if (!log_buf && errno == ENOSPC && buf_sz <= UINT_MAX / 2) ++ goto retry_load; + + err = -errno; +- pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno); +- if (*log_buf) +- pr_warn("%s\n", log_buf); +- goto done; ++ pr_warn("BTF loading error: %d\n", err); ++ /* don't print out contents of custom log_buf */ ++ if (!log_buf && buf[0]) ++ pr_warn("-- BEGIN BTF LOAD LOG ---\n%s\n-- END BTF LOAD LOG --\n", buf); + } + + done: +- free(log_buf); ++ free(buf); + return libbpf_err(err); + } ++ ++int btf__load_into_kernel(struct btf *btf) ++{ ++ return btf_load_into_kernel(btf, NULL, 0, 0); ++} ++ + int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel"))); + + int btf__fd(const struct btf *btf) +--- a/tools/lib/bpf/libbpf_internal.h ++++ b/tools/lib/bpf/libbpf_internal.h +@@ -277,6 +277,7 @@ int parse_cpu_mask_str(const char *s, bo + int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); + int libbpf__load_raw_btf(const char *raw_types, size_t types_len, + const char *str_sec, size_t str_len); ++int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level); + + struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); + void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, diff --git a/patches.suse/libbpf-Allow-passing-user-log-setting-through-bpf_ob.patch b/patches.suse/libbpf-Allow-passing-user-log-setting-through-bpf_ob.patch new file mode 100644 index 0000000..944f3f4 --- /dev/null +++ b/patches.suse/libbpf-Allow-passing-user-log-setting-through-bpf_ob.patch @@ -0,0 +1,153 @@ +From: Andrii Nakryiko +Date: Thu, 9 Dec 2021 11:38:32 -0800 +Subject: libbpf: Allow passing user log setting through bpf_object_open_opts +Patch-mainline: v5.17-rc1 +Git-commit: e0e3ea888c69b4ea17133b8ac8dfd5066a759b5a +References: jsc#PED-1368 + +Allow users to provide their own custom log_buf, log_size, and log_level +at bpf_object level through bpf_object_open_opts. This log_buf will be +used during BTF loading. Subsequent patch will use same log_buf during +BPF program loading, unless overriden at per-bpf_program level. + +When such custom log_buf is provided, libbpf won't be attempting +retrying loading of BTF to try to provide its own log buffer to capture +kernel's error log output. User is responsible to provide big enough +buffer, otherwise they run a risk of getting -ENOSPC error from the +bpf() syscall. + +See also comments in bpf_object_open_opts regarding log_level and +log_buf interactions. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211209193840.1248570-5-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/bpf.h | 3 ++- + tools/lib/bpf/libbpf.c | 24 +++++++++++++++++++++++- + tools/lib/bpf/libbpf.h | 41 ++++++++++++++++++++++++++++++++++++++++- + 3 files changed, 65 insertions(+), 3 deletions(-) + +--- a/tools/lib/bpf/bpf.h ++++ b/tools/lib/bpf/bpf.h +@@ -195,8 +195,9 @@ struct bpf_load_program_attr { + /* Flags to direct loading requirements */ + #define MAPS_RELAX_COMPAT 0x01 + +-/* Recommend log buffer size */ ++/* Recommended log buffer size */ + #define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */ ++ + LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead") + LIBBPF_API int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, + char *log_buf, size_t log_buf_sz); +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -573,6 +573,11 @@ struct bpf_object { + size_t btf_module_cnt; + size_t btf_module_cap; + ++ /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */ ++ char *log_buf; ++ size_t log_size; ++ __u32 log_level; ++ + void *priv; + bpf_object_clear_priv_t clear_priv; + +@@ -3017,7 +3022,9 @@ static int bpf_object__sanitize_and_load + */ + btf__set_fd(kern_btf, 0); + } else { +- err = btf__load_into_kernel(kern_btf); ++ /* currently BPF_BTF_LOAD only supports log_level 1 */ ++ err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, ++ obj->log_level ? 1 : 0); + } + if (sanitize) { + if (!err) { +@@ -6932,6 +6939,9 @@ __bpf_object__open(const char *path, con + struct bpf_object *obj; + char tmp_name[64]; + int err; ++ char *log_buf; ++ size_t log_size; ++ __u32 log_level; + + if (elf_version(EV_CURRENT) == EV_NONE) { + pr_warn("failed to init libelf for %s\n", +@@ -6954,10 +6964,22 @@ __bpf_object__open(const char *path, con + pr_debug("loading object '%s' from buffer\n", obj_name); + } + ++ log_buf = OPTS_GET(opts, kernel_log_buf, NULL); ++ log_size = OPTS_GET(opts, kernel_log_size, 0); ++ log_level = OPTS_GET(opts, kernel_log_level, 0); ++ if (log_size > UINT_MAX) ++ return ERR_PTR(-EINVAL); ++ if (log_size && !log_buf) ++ return ERR_PTR(-EINVAL); ++ + obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); + if (IS_ERR(obj)) + return obj; + ++ obj->log_buf = log_buf; ++ obj->log_size = log_size; ++ obj->log_level = log_level; ++ + btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); + if (btf_tmp_path) { + if (strlen(btf_tmp_path) >= PATH_MAX) { +--- a/tools/lib/bpf/libbpf.h ++++ b/tools/lib/bpf/libbpf.h +@@ -108,8 +108,47 @@ struct bpf_object_open_opts { + * struct_ops, etc) will need actual kernel BTF at /sys/kernel/btf/vmlinux. + */ + const char *btf_custom_path; ++ /* Pointer to a buffer for storing kernel logs for applicable BPF ++ * commands. Valid kernel_log_size has to be specified as well and are ++ * passed-through to bpf() syscall. Keep in mind that kernel might ++ * fail operation with -ENOSPC error if provided buffer is too small ++ * to contain entire log output. ++ * See the comment below for kernel_log_level for interaction between ++ * log_buf and log_level settings. ++ * ++ * If specified, this log buffer will be passed for: ++ * - each BPF progral load (BPF_PROG_LOAD) attempt, unless overriden ++ * with bpf_program__set_log() on per-program level, to get ++ * BPF verifier log output. ++ * - during BPF object's BTF load into kernel (BPF_BTF_LOAD) to get ++ * BTF sanity checking log. ++ * ++ * Each BPF command (BPF_BTF_LOAD or BPF_PROG_LOAD) will overwrite ++ * previous contents, so if you need more fine-grained control, set ++ * per-program buffer with bpf_program__set_log_buf() to preserve each ++ * individual program's verification log. Keep using kernel_log_buf ++ * for BTF verification log, if necessary. ++ */ ++ char *kernel_log_buf; ++ size_t kernel_log_size; ++ /* ++ * Log level can be set independently from log buffer. Log_level=0 ++ * means that libbpf will attempt loading BTF or program without any ++ * logging requested, but will retry with either its own or custom log ++ * buffer, if provided, and log_level=1 on any error. ++ * And vice versa, setting log_level>0 will request BTF or prog ++ * loading with verbose log from the first attempt (and as such also ++ * for successfully loaded BTF or program), and the actual log buffer ++ * could be either libbpf's own auto-allocated log buffer, if ++ * kernel_log_buffer is NULL, or user-provided custom kernel_log_buf. ++ * If user didn't provide custom log buffer, libbpf will emit captured ++ * logs through its print callback. ++ */ ++ __u32 kernel_log_level; ++ ++ size_t :0; + }; +-#define bpf_object_open_opts__last_field btf_custom_path ++#define bpf_object_open_opts__last_field kernel_log_level + + LIBBPF_API struct bpf_object *bpf_object__open(const char *path); + diff --git a/patches.suse/libbpf-Auto-bump-RLIMIT_MEMLOCK-if-kernel-needs-it-f.patch b/patches.suse/libbpf-Auto-bump-RLIMIT_MEMLOCK-if-kernel-needs-it-f.patch new file mode 100644 index 0000000..c833a4d --- /dev/null +++ b/patches.suse/libbpf-Auto-bump-RLIMIT_MEMLOCK-if-kernel-needs-it-f.patch @@ -0,0 +1,349 @@ +From: Andrii Nakryiko +Date: Tue, 14 Dec 2021 11:59:03 -0800 +Subject: libbpf: Auto-bump RLIMIT_MEMLOCK if kernel needs it for BPF +Patch-mainline: v5.17-rc1 +Git-commit: e542f2c4cd16d49392abf3349341d58153d3c603 +References: jsc#PED-1368 + +The need to increase RLIMIT_MEMLOCK to do anything useful with BPF is +one of the first extremely frustrating gotchas that all new BPF users go +through and in some cases have to learn it a very hard way. + +Luckily, starting with upstream Linux kernel version 5.11, BPF subsystem +dropped the dependency on memlock and uses memcg-based memory accounting +instead. Unfortunately, detecting memcg-based BPF memory accounting is +far from trivial (as can be evidenced by this patch), so in practice +most BPF applications still do unconditional RLIMIT_MEMLOCK increase. + +As we move towards libbpf 1.0, it would be good to allow users to forget +about RLIMIT_MEMLOCK vs memcg and let libbpf do the sensible adjustment +automatically. This patch paves the way forward in this matter. Libbpf +will do feature detection of memcg-based accounting, and if detected, +will do nothing. But if the kernel is too old, just like BCC, libbpf +will automatically increase RLIMIT_MEMLOCK on behalf of user +application ([0]). + +As this is technically a breaking change, during the transition period +applications have to opt into libbpf 1.0 mode by setting +LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK bit when calling +libbpf_set_strict_mode(). + +Libbpf allows to control the exact amount of set RLIMIT_MEMLOCK limit +with libbpf_set_memlock_rlim_max() API. Passing 0 will make libbpf do +nothing with RLIMIT_MEMLOCK. libbpf_set_memlock_rlim_max() has to be +called before the first bpf_prog_load(), bpf_btf_load(), or +bpf_object__load() call, otherwise it has no effect and will return +-EBUSY. + + [0] Closes: https://github.com/libbpf/libbpf/issues/369 + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211214195904.1785155-2-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/bpf.c | 81 ++++++++++++++++++++++++++++++++++++++++ + tools/lib/bpf/bpf.h | 2 + tools/lib/bpf/libbpf.c | 47 ++++------------------- + tools/lib/bpf/libbpf.map | 1 + tools/lib/bpf/libbpf_internal.h | 39 +++++++++++++++++++ + tools/lib/bpf/libbpf_legacy.h | 12 +++++ + 6 files changed, 143 insertions(+), 39 deletions(-) + +--- a/tools/lib/bpf/bpf.c ++++ b/tools/lib/bpf/bpf.c +@@ -28,7 +28,9 @@ + #include + #include + #include ++#include + #include ++#include + #include "bpf.h" + #include "libbpf.h" + #include "libbpf_internal.h" +@@ -94,6 +96,77 @@ static inline int sys_bpf_prog_load(unio + return fd; + } + ++/* Probe whether kernel switched from memlock-based (RLIMIT_MEMLOCK) to ++ * memcg-based memory accounting for BPF maps and progs. This was done in [0]. ++ * We use the support for bpf_ktime_get_coarse_ns() helper, which was added in ++ * the same 5.11 Linux release ([1]), to detect memcg-based accounting for BPF. ++ * ++ * [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/ ++ * [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper") ++ */ ++int probe_memcg_account(void) ++{ ++ const size_t prog_load_attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); ++ struct bpf_insn insns[] = { ++ BPF_EMIT_CALL(BPF_FUNC_ktime_get_coarse_ns), ++ BPF_EXIT_INSN(), ++ }; ++ size_t insn_cnt = sizeof(insns) / sizeof(insns[0]); ++ union bpf_attr attr; ++ int prog_fd; ++ ++ /* attempt loading freplace trying to use custom BTF */ ++ memset(&attr, 0, prog_load_attr_sz); ++ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; ++ attr.insns = ptr_to_u64(insns); ++ attr.insn_cnt = insn_cnt; ++ attr.license = ptr_to_u64("GPL"); ++ ++ prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, prog_load_attr_sz); ++ if (prog_fd >= 0) { ++ close(prog_fd); ++ return 1; ++ } ++ return 0; ++} ++ ++static bool memlock_bumped; ++static rlim_t memlock_rlim = RLIM_INFINITY; ++ ++int libbpf_set_memlock_rlim(size_t memlock_bytes) ++{ ++ if (memlock_bumped) ++ return libbpf_err(-EBUSY); ++ ++ memlock_rlim = memlock_bytes; ++ return 0; ++} ++ ++int bump_rlimit_memlock(void) ++{ ++ struct rlimit rlim; ++ ++ /* this the default in libbpf 1.0, but for now user has to opt-in explicitly */ ++ if (!(libbpf_mode & LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK)) ++ return 0; ++ ++ /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */ ++ if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT)) ++ return 0; ++ ++ memlock_bumped = true; ++ ++ /* zero memlock_rlim_max disables auto-bumping RLIMIT_MEMLOCK */ ++ if (memlock_rlim == 0) ++ return 0; ++ ++ rlim.rlim_cur = rlim.rlim_max = memlock_rlim; ++ if (setrlimit(RLIMIT_MEMLOCK, &rlim)) ++ return -errno; ++ ++ return 0; ++} ++ + int bpf_map_create(enum bpf_map_type map_type, + const char *map_name, + __u32 key_size, +@@ -105,6 +178,8 @@ int bpf_map_create(enum bpf_map_type map + union bpf_attr attr; + int fd; + ++ bump_rlimit_memlock(); ++ + memset(&attr, 0, attr_sz); + + if (!OPTS_VALID(opts, bpf_map_create_opts)) +@@ -251,6 +326,8 @@ int bpf_prog_load_v0_6_0(enum bpf_prog_t + union bpf_attr attr; + char *log_buf; + ++ bump_rlimit_memlock(); ++ + if (!OPTS_VALID(opts, bpf_prog_load_opts)) + return libbpf_err(-EINVAL); + +@@ -456,6 +533,8 @@ int bpf_verify_program(enum bpf_prog_typ + union bpf_attr attr; + int fd; + ++ bump_rlimit_memlock(); ++ + memset(&attr, 0, sizeof(attr)); + attr.prog_type = type; + attr.insn_cnt = (__u32)insns_cnt; +@@ -1056,6 +1135,8 @@ int bpf_btf_load(const void *btf_data, s + __u32 log_level; + int fd; + ++ bump_rlimit_memlock(); ++ + memset(&attr, 0, attr_sz); + + if (!OPTS_VALID(opts, bpf_btf_load_opts)) +--- a/tools/lib/bpf/bpf.h ++++ b/tools/lib/bpf/bpf.h +@@ -35,6 +35,8 @@ + extern "C" { + #endif + ++int libbpf_set_memlock_rlim(size_t memlock_bytes); ++ + struct bpf_map_create_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -187,42 +187,6 @@ const char *libbpf_version_string(void) + #undef __S + } + +-enum kern_feature_id { +- /* v4.14: kernel support for program & map names. */ +- FEAT_PROG_NAME, +- /* v5.2: kernel support for global data sections. */ +- FEAT_GLOBAL_DATA, +- /* BTF support */ +- FEAT_BTF, +- /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */ +- FEAT_BTF_FUNC, +- /* BTF_KIND_VAR and BTF_KIND_DATASEC support */ +- FEAT_BTF_DATASEC, +- /* BTF_FUNC_GLOBAL is supported */ +- FEAT_BTF_GLOBAL_FUNC, +- /* BPF_F_MMAPABLE is supported for arrays */ +- FEAT_ARRAY_MMAP, +- /* kernel support for expected_attach_type in BPF_PROG_LOAD */ +- FEAT_EXP_ATTACH_TYPE, +- /* bpf_probe_read_{kernel,user}[_str] helpers */ +- FEAT_PROBE_READ_KERN, +- /* BPF_PROG_BIND_MAP is supported */ +- FEAT_PROG_BIND_MAP, +- /* Kernel support for module BTFs */ +- FEAT_MODULE_BTF, +- /* BTF_KIND_FLOAT support */ +- FEAT_BTF_FLOAT, +- /* BPF perf link support */ +- FEAT_PERF_LINK, +- /* BTF_KIND_DECL_TAG support */ +- FEAT_BTF_DECL_TAG, +- /* BTF_KIND_TYPE_TAG support */ +- FEAT_BTF_TYPE_TAG, +- __FEAT_CNT, +-}; +- +-static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); +- + enum reloc_type { + RELO_LD64, + RELO_CALL, +@@ -4352,6 +4316,10 @@ bpf_object__probe_loading(struct bpf_obj + if (obj->gen_loader) + return 0; + ++ ret = bump_rlimit_memlock(); ++ if (ret) ++ pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); ++ + /* make sure basic loading works */ + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + if (ret < 0) +@@ -4718,14 +4686,17 @@ static struct kern_feature_desc { + [FEAT_BTF_TYPE_TAG] = { + "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, + }, ++ [FEAT_MEMCG_ACCOUNT] = { ++ "memcg-based memory accounting", probe_memcg_account, ++ }, + }; + +-static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) ++bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) + { + struct kern_feature_desc *feat = &feature_probes[feat_id]; + int ret; + +- if (obj->gen_loader) ++ if (obj && obj->gen_loader) + /* To generate loader program assume the latest kernel + * to avoid doing extra prog_load, map_create syscalls. + */ +--- a/tools/lib/bpf/libbpf.map ++++ b/tools/lib/bpf/libbpf.map +@@ -427,4 +427,5 @@ LIBBPF_0.7.0 { + bpf_program__log_level; + bpf_program__set_log_buf; + bpf_program__set_log_level; ++ libbpf_set_memlock_rlim_max; + }; +--- a/tools/lib/bpf/libbpf_internal.h ++++ b/tools/lib/bpf/libbpf_internal.h +@@ -291,6 +291,45 @@ static inline bool libbpf_validate_opts( + (opts)->sz - __off); \ + }) + ++enum kern_feature_id { ++ /* v4.14: kernel support for program & map names. */ ++ FEAT_PROG_NAME, ++ /* v5.2: kernel support for global data sections. */ ++ FEAT_GLOBAL_DATA, ++ /* BTF support */ ++ FEAT_BTF, ++ /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */ ++ FEAT_BTF_FUNC, ++ /* BTF_KIND_VAR and BTF_KIND_DATASEC support */ ++ FEAT_BTF_DATASEC, ++ /* BTF_FUNC_GLOBAL is supported */ ++ FEAT_BTF_GLOBAL_FUNC, ++ /* BPF_F_MMAPABLE is supported for arrays */ ++ FEAT_ARRAY_MMAP, ++ /* kernel support for expected_attach_type in BPF_PROG_LOAD */ ++ FEAT_EXP_ATTACH_TYPE, ++ /* bpf_probe_read_{kernel,user}[_str] helpers */ ++ FEAT_PROBE_READ_KERN, ++ /* BPF_PROG_BIND_MAP is supported */ ++ FEAT_PROG_BIND_MAP, ++ /* Kernel support for module BTFs */ ++ FEAT_MODULE_BTF, ++ /* BTF_KIND_FLOAT support */ ++ FEAT_BTF_FLOAT, ++ /* BPF perf link support */ ++ FEAT_PERF_LINK, ++ /* BTF_KIND_DECL_TAG support */ ++ FEAT_BTF_DECL_TAG, ++ /* BTF_KIND_TYPE_TAG support */ ++ FEAT_BTF_TYPE_TAG, ++ /* memcg-based accounting for BPF maps and progs */ ++ FEAT_MEMCG_ACCOUNT, ++ __FEAT_CNT, ++}; ++ ++int probe_memcg_account(void); ++bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); ++int bump_rlimit_memlock(void); + + int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); + int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); +--- a/tools/lib/bpf/libbpf_legacy.h ++++ b/tools/lib/bpf/libbpf_legacy.h +@@ -45,7 +45,6 @@ enum libbpf_strict_mode { + * (positive) error code. + */ + LIBBPF_STRICT_DIRECT_ERRS = 0x02, +- + /* + * Enforce strict BPF program section (SEC()) names. + * E.g., while prefiously SEC("xdp_whatever") or SEC("perf_event_blah") were +@@ -63,6 +62,17 @@ enum libbpf_strict_mode { + * Clients can maintain it on their own if it is valuable for them. + */ + LIBBPF_STRICT_NO_OBJECT_LIST = 0x08, ++ /* ++ * Automatically bump RLIMIT_MEMLOCK using setrlimit() before the ++ * first BPF program or map creation operation. This is done only if ++ * kernel is too old to support memcg-based memory accounting for BPF ++ * subsystem. By default, RLIMIT_MEMLOCK limit is set to RLIM_INFINITY, ++ * but it can be overriden with libbpf_set_memlock_rlim_max() API. ++ * Note that libbpf_set_memlock_rlim_max() needs to be called before ++ * the very first bpf_prog_load(), bpf_map_create() or bpf_object__load() ++ * operation. ++ */ ++ LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK = 0x10, + + __LIBBPF_STRICT_LAST, + }; diff --git a/patches.suse/libbpf-Avoid-double-stores-for-success-failure-case-.patch b/patches.suse/libbpf-Avoid-double-stores-for-success-failure-case-.patch new file mode 100644 index 0000000..66ca5c4 --- /dev/null +++ b/patches.suse/libbpf-Avoid-double-stores-for-success-failure-case-.patch @@ -0,0 +1,92 @@ +From: Kumar Kartikeya Dwivedi +Date: Tue, 23 Nov 2021 05:27:32 +0530 +Subject: libbpf: Avoid double stores for success/failure case of ksym + relocations +Patch-mainline: v5.17-rc1 +Git-commit: 0270090d396a8e7e7f42adae13fdfa48ffb85144 +References: jsc#PED-1368 + +Instead, jump directly to success case stores in case ret >= 0, else do +the default 0 value store and jump over the success case. This is better +in terms of readability. Readjust the code for kfunc relocation as well +to follow a similar pattern, also leads to easier to follow code now. + +Suggested-by: Alexei Starovoitov +Signed-off-by: Kumar Kartikeya Dwivedi +Signed-off-by: Andrii Nakryiko +Acked-by: Song Liu +Link: https://lore.kernel.org/bpf/20211122235733.634914-3-memxor@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/gen_loader.c | 37 +++++++++++++++++++++---------------- + 1 file changed, 21 insertions(+), 16 deletions(-) + +--- a/tools/lib/bpf/gen_loader.c ++++ b/tools/lib/bpf/gen_loader.c +@@ -687,27 +687,29 @@ static void emit_relo_kfunc_btf(struct b + return; + } + kdesc->off = btf_fd_idx; +- /* set a default value for imm */ ++ /* jump to success case */ ++ emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); ++ /* set value for imm, off as 0 */ + emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0)); +- /* skip success case store if ret < 0 */ +- emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 1)); ++ emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); ++ /* skip success case for ret < 0 */ ++ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 10)); + /* store btf_id into insn[insn_idx].imm */ + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm))); ++ /* obtain fd in BPF_REG_9 */ ++ emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7)); ++ emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); ++ /* jump to fd_array store if fd denotes module BTF */ ++ emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2)); ++ /* set the default value for off */ ++ emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); ++ /* skip BTF fd store for vmlinux BTF */ ++ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4)); + /* load fd_array slot pointer */ + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx))); +- /* skip store of BTF fd if ret < 0 */ +- emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 3)); + /* store BTF fd in slot */ +- emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7)); +- emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0)); +- /* set a default value for off */ +- emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); +- /* skip insn->off store if ret < 0 */ +- emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 2)); +- /* skip if vmlinux BTF */ +- emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1)); + /* store index into insn[insn_idx].off */ + emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx)); + log: +@@ -816,17 +818,20 @@ static void emit_relo_ksym_btf(struct bp + emit_bpf_find_by_name_kind(gen, relo); + if (!relo->is_weak) + emit_check_err(gen); +- /* set default values as 0 */ ++ /* jump to success case */ ++ emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); ++ /* set values for insn[insn_idx].imm, insn[insn_idx + 1].imm as 0 */ + emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0)); + emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 0)); +- /* skip success case stores if ret < 0 */ +- emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 4)); ++ /* skip success case for ret < 0 */ ++ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4)); + /* store btf_id into insn[insn_idx].imm */ + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm))); + /* store btf_obj_fd into insn[insn_idx + 1].imm */ + emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm))); ++ /* skip src_reg adjustment */ + emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); + clear_src_reg: + /* clear bpf_object__relocate_data's src_reg assignment, otherwise we get a verifier failure */ diff --git a/patches.suse/libbpf-Avoid-reading-past-ELF-data-section-end-when-.patch b/patches.suse/libbpf-Avoid-reading-past-ELF-data-section-end-when-.patch new file mode 100644 index 0000000..12961ed --- /dev/null +++ b/patches.suse/libbpf-Avoid-reading-past-ELF-data-section-end-when-.patch @@ -0,0 +1,39 @@ +From: Andrii Nakryiko +Date: Tue, 14 Dec 2021 15:20:54 -0800 +Subject: libbpf: Avoid reading past ELF data section end when copying license +Patch-mainline: v5.17-rc1 +Git-commit: f97982398cc1c92f2e9bd0ef1ef870a5a729b0ac +References: jsc#PED-1368 + +Fix possible read beyond ELF "license" data section if the license +string is not properly zero-terminated. Use the fact that libbpf_strlcpy +never accesses the (N-1)st byte of the source string because it's +replaced with '\0' anyways. + +If this happens, it's a violation of contract between libbpf and a user, +but not handling this more robustly upsets CIFuzz, so given the fix is +trivial, let's fix the potential issue. + +Fixes: 9fc205b413b3 ("libbpf: Add sane strncpy alternative and use it internally") +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211214232054.3458774-1-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -1320,7 +1320,10 @@ static int bpf_object__check_endianness( + static int + bpf_object__init_license(struct bpf_object *obj, void *data, size_t size) + { +- libbpf_strlcpy(obj->license, data, sizeof(obj->license)); ++ /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't ++ * go over allowed ELF data section buffer ++ */ ++ libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license))); + pr_debug("license of %s is %s\n", obj->path, obj->license); + return 0; + } diff --git a/patches.suse/libbpf-Avoid-reload-of-imm-for-weak-unresolved-repea.patch b/patches.suse/libbpf-Avoid-reload-of-imm-for-weak-unresolved-repea.patch new file mode 100644 index 0000000..2c682f2 --- /dev/null +++ b/patches.suse/libbpf-Avoid-reload-of-imm-for-weak-unresolved-repea.patch @@ -0,0 +1,36 @@ +From: Kumar Kartikeya Dwivedi +Date: Tue, 23 Nov 2021 05:27:33 +0530 +Subject: libbpf: Avoid reload of imm for weak, unresolved, repeating ksym +Patch-mainline: v5.17-rc1 +Git-commit: d995816b77eb826e0f6d7adf4471ec191b362be0 +References: jsc#PED-1368 + +Alexei pointed out that we can use BPF_REG_0 which already contains imm +from move_blob2blob computation. Note that we now compare the second +insn's imm, but this should not matter, since both will be zeroed out +for the error case for the insn populated earlier. + +Suggested-by: Alexei Starovoitov +Signed-off-by: Kumar Kartikeya Dwivedi +Signed-off-by: Andrii Nakryiko +Acked-by: Song Liu +Link: https://lore.kernel.org/bpf/20211122235733.634914-4-memxor@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/gen_loader.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/tools/lib/bpf/gen_loader.c ++++ b/tools/lib/bpf/gen_loader.c +@@ -808,9 +808,8 @@ static void emit_relo_ksym_btf(struct bp + kdesc->insn + offsetof(struct bpf_insn, imm)); + move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4, + kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)); +- emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_8, offsetof(struct bpf_insn, imm))); +- /* jump over src_reg adjustment if imm is not 0 */ +- emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 3)); ++ /* jump over src_reg adjustment if imm is not 0, reuse BPF_REG_0 from move_blob2blob */ ++ emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3)); + goto clear_src_reg; + } + /* remember insn offset, so we can copy BTF ID and FD later */ diff --git a/patches.suse/libbpf-Change-bpf_program__set_extra_flags-to-bpf_pr.patch b/patches.suse/libbpf-Change-bpf_program__set_extra_flags-to-bpf_pr.patch new file mode 100644 index 0000000..e84222b --- /dev/null +++ b/patches.suse/libbpf-Change-bpf_program__set_extra_flags-to-bpf_pr.patch @@ -0,0 +1,83 @@ +From: Florent Revest +Date: Fri, 19 Nov 2021 19:00:35 +0100 +Subject: libbpf: Change bpf_program__set_extra_flags to bpf_program__set_flags +Patch-mainline: v5.17-rc1 +Git-commit: 8cccee9e91e19207671b94af40bacf7c1d2e74ef +References: jsc#PED-1368 + +bpf_program__set_extra_flags has just been introduced so we can still +change it without breaking users. + +This new interface is a bit more flexible (for example if someone wants +to clear a flag). + +Signed-off-by: Florent Revest +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211119180035.1396139-1-revest@chromium.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 4 ++-- + tools/lib/bpf/libbpf.h | 2 +- + tools/lib/bpf/libbpf.map | 2 +- + tools/testing/selftests/bpf/testing_helpers.c | 4 +++- + 4 files changed, 7 insertions(+), 5 deletions(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -8313,12 +8313,12 @@ __u32 bpf_program__flags(const struct bp + return prog->prog_flags; + } + +-int bpf_program__set_extra_flags(struct bpf_program *prog, __u32 extra_flags) ++int bpf_program__set_flags(struct bpf_program *prog, __u32 flags) + { + if (prog->obj->loaded) + return libbpf_err(-EBUSY); + +- prog->prog_flags |= extra_flags; ++ prog->prog_flags = flags; + return 0; + } + +--- a/tools/lib/bpf/libbpf.h ++++ b/tools/lib/bpf/libbpf.h +@@ -497,7 +497,7 @@ bpf_program__set_expected_attach_type(st + enum bpf_attach_type type); + + LIBBPF_API __u32 bpf_program__flags(const struct bpf_program *prog); +-LIBBPF_API int bpf_program__set_extra_flags(struct bpf_program *prog, __u32 extra_flags); ++LIBBPF_API int bpf_program__set_flags(struct bpf_program *prog, __u32 flags); + + LIBBPF_API int + bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, +--- a/tools/lib/bpf/libbpf.map ++++ b/tools/lib/bpf/libbpf.map +@@ -400,7 +400,7 @@ LIBBPF_0.6.0 { + bpf_program__flags; + bpf_program__insn_cnt; + bpf_program__insns; +- bpf_program__set_extra_flags; ++ bpf_program__set_flags; + btf__add_btf; + btf__add_decl_tag; + btf__add_type_tag; +--- a/tools/testing/selftests/bpf/testing_helpers.c ++++ b/tools/testing/selftests/bpf/testing_helpers.c +@@ -91,6 +91,7 @@ int bpf_prog_test_load(const char *file, + struct bpf_object_load_attr attr = {}; + struct bpf_object *obj; + struct bpf_program *prog; ++ __u32 flags; + int err; + + obj = bpf_object__open(file); +@@ -106,7 +107,8 @@ int bpf_prog_test_load(const char *file, + if (type != BPF_PROG_TYPE_UNSPEC) + bpf_program__set_type(prog, type); + +- bpf_program__set_extra_flags(prog, BPF_F_TEST_RND_HI32); ++ flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32; ++ bpf_program__set_flags(prog, flags); + + attr.obj = obj; + attr.log_level = extra_prog_load_log_flags; diff --git a/patches.suse/libbpf-Clean-gen_loader-s-attach-kind.patch b/patches.suse/libbpf-Clean-gen_loader-s-attach-kind.patch new file mode 100644 index 0000000..487a36b --- /dev/null +++ b/patches.suse/libbpf-Clean-gen_loader-s-attach-kind.patch @@ -0,0 +1,36 @@ +From: Alexei Starovoitov +Date: Wed, 1 Dec 2021 10:10:34 -0800 +Subject: libbpf: Clean gen_loader's attach kind. +Patch-mainline: v5.17-rc1 +Git-commit: 19250f5fc0c283892a61f3abf9d65e6325f63897 +References: jsc#PED-1368 + +The gen_loader has to clear attach_kind otherwise the programs +without attach_btf_id will fail load if they follow programs +with attach_btf_id. + +Fixes: 67234743736a ("libbpf: Generate loader program out of BPF ELF file.") +Signed-off-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211201181040.23337-12-alexei.starovoitov@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/gen_loader.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/tools/lib/bpf/gen_loader.c ++++ b/tools/lib/bpf/gen_loader.c +@@ -1018,9 +1018,11 @@ void bpf_gen__prog_load(struct bpf_gen * + debug_ret(gen, "prog_load %s insn_cnt %d", attr.prog_name, attr.insn_cnt); + /* successful or not, close btf module FDs used in extern ksyms and attach_btf_obj_fd */ + cleanup_relos(gen, insns_off); +- if (gen->attach_kind) ++ if (gen->attach_kind) { + emit_sys_close_blob(gen, + attr_field(prog_load_attr, attach_btf_obj_fd)); ++ gen->attach_kind = 0; ++ } + emit_check_err(gen); + /* remember prog_fd in the stack, if successful */ + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, diff --git a/patches.suse/libbpf-Cleanup-struct-bpf_core_cand.patch b/patches.suse/libbpf-Cleanup-struct-bpf_core_cand.patch new file mode 100644 index 0000000..d4dc712 --- /dev/null +++ b/patches.suse/libbpf-Cleanup-struct-bpf_core_cand.patch @@ -0,0 +1,106 @@ +From: Andrii Nakryiko +Date: Wed, 1 Dec 2021 10:10:30 -0800 +Subject: libbpf: Cleanup struct bpf_core_cand. +Patch-mainline: v5.17-rc1 +Git-commit: 03d5b99138dd8c7bfb838396acb180bd515ebf06 +References: jsc#PED-1368 + +Remove two redundant fields from struct bpf_core_cand. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211201181040.23337-8-alexei.starovoitov@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 30 +++++++++++++++++------------- + tools/lib/bpf/relo_core.h | 2 -- + 2 files changed, 17 insertions(+), 15 deletions(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -5179,15 +5179,18 @@ static int bpf_core_add_cands(struct bpf + struct bpf_core_cand_list *cands) + { + struct bpf_core_cand *new_cands, *cand; +- const struct btf_type *t; +- const char *targ_name; ++ const struct btf_type *t, *local_t; ++ const char *targ_name, *local_name; + size_t targ_essent_len; + int n, i; + ++ local_t = btf__type_by_id(local_cand->btf, local_cand->id); ++ local_name = btf__str_by_offset(local_cand->btf, local_t->name_off); ++ + n = btf__type_cnt(targ_btf); + for (i = targ_start_id; i < n; i++) { + t = btf__type_by_id(targ_btf, i); +- if (btf_kind(t) != btf_kind(local_cand->t)) ++ if (btf_kind(t) != btf_kind(local_t)) + continue; + + targ_name = btf__name_by_offset(targ_btf, t->name_off); +@@ -5198,12 +5201,12 @@ static int bpf_core_add_cands(struct bpf + if (targ_essent_len != local_essent_len) + continue; + +- if (strncmp(local_cand->name, targ_name, local_essent_len) != 0) ++ if (strncmp(local_name, targ_name, local_essent_len) != 0) + continue; + + pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n", +- local_cand->id, btf_kind_str(local_cand->t), +- local_cand->name, i, btf_kind_str(t), targ_name, ++ local_cand->id, btf_kind_str(local_t), ++ local_name, i, btf_kind_str(t), targ_name, + targ_btf_name); + new_cands = libbpf_reallocarray(cands->cands, cands->len + 1, + sizeof(*cands->cands)); +@@ -5212,8 +5215,6 @@ static int bpf_core_add_cands(struct bpf + + cand = &new_cands[cands->len]; + cand->btf = targ_btf; +- cand->t = t; +- cand->name = targ_name; + cand->id = i; + + cands->cands = new_cands; +@@ -5320,18 +5321,21 @@ bpf_core_find_cands(struct bpf_object *o + struct bpf_core_cand local_cand = {}; + struct bpf_core_cand_list *cands; + const struct btf *main_btf; ++ const struct btf_type *local_t; ++ const char *local_name; + size_t local_essent_len; + int err, i; + + local_cand.btf = local_btf; +- local_cand.t = btf__type_by_id(local_btf, local_type_id); +- if (!local_cand.t) ++ local_cand.id = local_type_id; ++ local_t = btf__type_by_id(local_btf, local_type_id); ++ if (!local_t) + return ERR_PTR(-EINVAL); + +- local_cand.name = btf__name_by_offset(local_btf, local_cand.t->name_off); +- if (str_is_empty(local_cand.name)) ++ local_name = btf__name_by_offset(local_btf, local_t->name_off); ++ if (str_is_empty(local_name)) + return ERR_PTR(-EINVAL); +- local_essent_len = bpf_core_essential_name_len(local_cand.name); ++ local_essent_len = bpf_core_essential_name_len(local_name); + + cands = calloc(1, sizeof(*cands)); + if (!cands) +--- a/tools/lib/bpf/relo_core.h ++++ b/tools/lib/bpf/relo_core.h +@@ -8,8 +8,6 @@ + + struct bpf_core_cand { + const struct btf *btf; +- const struct btf_type *t; +- const char *name; + __u32 id; + }; + diff --git a/patches.suse/libbpf-Compile-using-std-gnu89.patch b/patches.suse/libbpf-Compile-using-std-gnu89.patch new file mode 100644 index 0000000..8956711 --- /dev/null +++ b/patches.suse/libbpf-Compile-using-std-gnu89.patch @@ -0,0 +1,29 @@ +From: Kumar Kartikeya Dwivedi +Date: Sat, 6 Nov 2021 05:12:40 +0530 +Subject: libbpf: Compile using -std=gnu89 +Patch-mainline: v5.17-rc1 +Git-commit: 3a74ac2d1159716f35c944639f71b33fa16084c8 +References: jsc#PED-1368 + +The minimum supported C standard version is C89, with use of GNU +extensions, hence make sure to catch any instances that would break +the build for this mode by passing -std=gnu89. + +Signed-off-by: Kumar Kartikeya Dwivedi +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211105234243.390179-4-memxor@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/Makefile | 1 + + 1 file changed, 1 insertion(+) + +--- a/tools/lib/bpf/Makefile ++++ b/tools/lib/bpf/Makefile +@@ -84,6 +84,7 @@ else + endif + + # Append required CFLAGS ++override CFLAGS += -std=gnu89 + override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum + override CFLAGS += -Werror -Wall + override CFLAGS += $(INCLUDES) diff --git a/patches.suse/libbpf-Deprecate-bpf_object__load_xattr.patch b/patches.suse/libbpf-Deprecate-bpf_object__load_xattr.patch new file mode 100644 index 0000000..7944b67 --- /dev/null +++ b/patches.suse/libbpf-Deprecate-bpf_object__load_xattr.patch @@ -0,0 +1,94 @@ +From: Andrii Nakryiko +Date: Thu, 9 Dec 2021 11:38:36 -0800 +Subject: libbpf: Deprecate bpf_object__load_xattr() +Patch-mainline: v5.17-rc1 +Git-commit: e7b924ca715f0d1c0be62b205c36c4076b335421 +References: jsc#PED-1368 + +Deprecate non-extensible bpf_object__load_xattr() in v0.8 ([0]). + +With log_level control through bpf_object_open_opts or +bpf_program__set_log_level(), we are finally at the point where +bpf_object__load_xattr() doesn't provide any functionality that can't be +accessed through other (better) ways. The other feature, +target_btf_path, is also controllable through bpf_object_open_opts. + + [0] Closes: https://github.com/libbpf/libbpf/issues/289 + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211209193840.1248570-9-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 23 ++++++++++------------- + tools/lib/bpf/libbpf.h | 1 + + 2 files changed, 11 insertions(+), 13 deletions(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -7460,14 +7460,10 @@ static int bpf_object__resolve_externs(s + return 0; + } + +-int bpf_object__load_xattr(struct bpf_object_load_attr *attr) ++static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) + { +- struct bpf_object *obj; + int err, i; + +- if (!attr) +- return libbpf_err(-EINVAL); +- obj = attr->obj; + if (!obj) + return libbpf_err(-EINVAL); + +@@ -7477,7 +7473,7 @@ int bpf_object__load_xattr(struct bpf_ob + } + + if (obj->gen_loader) +- bpf_gen__init(obj->gen_loader, attr->log_level, obj->nr_programs, obj->nr_maps); ++ bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); + + err = bpf_object__probe_loading(obj); + err = err ? : bpf_object__load_vmlinux_btf(obj, false); +@@ -7486,8 +7482,8 @@ int bpf_object__load_xattr(struct bpf_ob + err = err ? : bpf_object__sanitize_maps(obj); + err = err ? : bpf_object__init_kern_struct_ops_maps(obj); + err = err ? : bpf_object__create_maps(obj); +- err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : attr->target_btf_path); +- err = err ? : bpf_object__load_progs(obj, attr->log_level); ++ err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); ++ err = err ? : bpf_object__load_progs(obj, extra_log_level); + err = err ? : bpf_object_init_prog_arrays(obj); + + if (obj->gen_loader) { +@@ -7532,13 +7528,14 @@ out: + return libbpf_err(err); + } + +-int bpf_object__load(struct bpf_object *obj) ++int bpf_object__load_xattr(struct bpf_object_load_attr *attr) + { +- struct bpf_object_load_attr attr = { +- .obj = obj, +- }; ++ return bpf_object_load(attr->obj, attr->log_level, attr->target_btf_path); ++} + +- return bpf_object__load_xattr(&attr); ++int bpf_object__load(struct bpf_object *obj) ++{ ++ return bpf_object_load(obj, 0, NULL); + } + + static int make_parent_dir(const char *path) +--- a/tools/lib/bpf/libbpf.h ++++ b/tools/lib/bpf/libbpf.h +@@ -214,6 +214,7 @@ struct bpf_object_load_attr { + + /* Load/unload object into/from kernel */ + LIBBPF_API int bpf_object__load(struct bpf_object *obj); ++LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__load() instead") + LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr); + LIBBPF_API int bpf_object__unload(struct bpf_object *obj); + diff --git a/patches.suse/libbpf-Deprecate-bpf_perf_event_read_simple-API.patch b/patches.suse/libbpf-Deprecate-bpf_perf_event_read_simple-API.patch new file mode 100644 index 0000000..c3387f1 --- /dev/null +++ b/patches.suse/libbpf-Deprecate-bpf_perf_event_read_simple-API.patch @@ -0,0 +1,82 @@ +From: Christy Lee +Date: Wed, 29 Dec 2021 12:41:56 -0800 +Subject: libbpf: Deprecate bpf_perf_event_read_simple() API +Patch-mainline: v5.17-rc1 +Git-commit: 7218c28c87f57c131879a75a226b9033ac90b266 +References: jsc#PED-1368 + +With perf_buffer__poll() and perf_buffer__consume() APIs available, +there is no reason to expose bpf_perf_event_read_simple() API to +users. If users need custom perf buffer, they could re-implement +the function. + +Mark bpf_perf_event_read_simple() and move the logic to a new +static function so it can still be called by other functions in the +same file. + + [0] Closes: https://github.com/libbpf/libbpf/issues/310 + +Signed-off-by: Christy Lee +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211229204156.13569-1-christylee@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 22 ++++++++++++++-------- + tools/lib/bpf/libbpf.h | 1 + + 2 files changed, 15 insertions(+), 8 deletions(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -10660,10 +10660,10 @@ struct bpf_link *bpf_map__attach_struct_ + return link; + } + +-enum bpf_perf_event_ret +-bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, +- void **copy_mem, size_t *copy_size, +- bpf_perf_event_print_t fn, void *private_data) ++static enum bpf_perf_event_ret ++perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, ++ void **copy_mem, size_t *copy_size, ++ bpf_perf_event_print_t fn, void *private_data) + { + struct perf_event_mmap_page *header = mmap_mem; + __u64 data_head = ring_buffer_read_head(header); +@@ -10708,6 +10708,12 @@ bpf_perf_event_read_simple(void *mmap_me + return libbpf_err(ret); + } + ++__attribute__((alias("perf_event_read_simple"))) ++enum bpf_perf_event_ret ++bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, ++ void **copy_mem, size_t *copy_size, ++ bpf_perf_event_print_t fn, void *private_data); ++ + struct perf_buffer; + + struct perf_buffer_params { +@@ -11116,10 +11122,10 @@ static int perf_buffer__process_records( + { + enum bpf_perf_event_ret ret; + +- ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size, +- pb->page_size, &cpu_buf->buf, +- &cpu_buf->buf_size, +- perf_buffer__process_record, cpu_buf); ++ ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size, ++ pb->page_size, &cpu_buf->buf, ++ &cpu_buf->buf_size, ++ perf_buffer__process_record, cpu_buf); + if (ret != LIBBPF_PERF_EVENT_CONT) + return ret; + return 0; +--- a/tools/lib/bpf/libbpf.h ++++ b/tools/lib/bpf/libbpf.h +@@ -976,6 +976,7 @@ LIBBPF_API int perf_buffer__buffer_fd(co + typedef enum bpf_perf_event_ret + (*bpf_perf_event_print_t)(struct perf_event_header *hdr, + void *private_data); ++LIBBPF_DEPRECATED_SINCE(0, 8, "use perf_buffer__poll() or perf_buffer__consume() instead") + LIBBPF_API enum bpf_perf_event_ret + bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, + void **copy_mem, size_t *copy_size, diff --git a/patches.suse/libbpf-Deprecate-bpf_prog_load_xattr-API.patch b/patches.suse/libbpf-Deprecate-bpf_prog_load_xattr-API.patch new file mode 100644 index 0000000..cb6ed14 --- /dev/null +++ b/patches.suse/libbpf-Deprecate-bpf_prog_load_xattr-API.patch @@ -0,0 +1,51 @@ +From: Andrii Nakryiko +Date: Wed, 1 Dec 2021 15:28:24 -0800 +Subject: libbpf: Deprecate bpf_prog_load_xattr() API +Patch-mainline: v5.17-rc1 +Git-commit: c93faaaf2f67ba5396840316651cdc7640d9fa9e +References: jsc#PED-1368 + +bpf_prog_load_xattr() is high-level API that's named as a low-level +BPF_PROG_LOAD wrapper APIs, but it actually operates on struct +bpf_object. It's badly and confusingly misnamed as it will load all the +progs insige bpf_object, returning prog_fd of the very first BPF +program. It also has a bunch of ad-hoc things like log_level override, +map_ifindex auto-setting, etc. All this can be expressed more explicitly +and cleanly through existing libbpf APIs. This patch marks +bpf_prog_load_xattr() for deprecation in libbpf v0.8 ([0]). + + [0] Closes: https://github.com/libbpf/libbpf/issues/308 + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211201232824.3166325-10-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.h | 1 + + tools/lib/bpf/libbpf_common.h | 5 +++++ + 2 files changed, 6 insertions(+) + +--- a/tools/lib/bpf/libbpf.h ++++ b/tools/lib/bpf/libbpf.h +@@ -632,6 +632,7 @@ struct bpf_prog_load_attr { + int prog_flags; + }; + ++LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__open() and bpf_object__load() instead") + LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, + struct bpf_object **pobj, int *prog_fd); + LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__open() and bpf_object__load() instead") +--- a/tools/lib/bpf/libbpf_common.h ++++ b/tools/lib/bpf/libbpf_common.h +@@ -40,6 +40,11 @@ + #else + #define __LIBBPF_MARK_DEPRECATED_0_7(X) + #endif ++#if __LIBBPF_CURRENT_VERSION_GEQ(0, 8) ++#define __LIBBPF_MARK_DEPRECATED_0_8(X) X ++#else ++#define __LIBBPF_MARK_DEPRECATED_0_8(X) ++#endif + + /* This set of internal macros allows to do "function overloading" based on + * number of arguments provided by used in backwards-compatible way during the diff --git a/patches.suse/libbpf-Deprecate-bpf_program__load-API.patch b/patches.suse/libbpf-Deprecate-bpf_program__load-API.patch new file mode 100644 index 0000000..ee2411c --- /dev/null +++ b/patches.suse/libbpf-Deprecate-bpf_program__load-API.patch @@ -0,0 +1,135 @@ +From: Andrii Nakryiko +Date: Tue, 2 Nov 2021 22:14:49 -0700 +Subject: libbpf: Deprecate bpf_program__load() API +Patch-mainline: v5.17-rc1 +Git-commit: be2f2d1680dfb36793ea8d3110edd4a1db496352 +References: jsc#PED-1368 + +Mark bpf_program__load() as deprecated ([0]) since v0.6. Also rename few +internal program loading bpf_object helper functions to have more +consistent naming. + + [0] Closes: https://github.com/libbpf/libbpf/issues/301 + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211103051449.1884903-1-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 36 ++++++++++++++++++++++-------------- + tools/lib/bpf/libbpf.h | 4 ++-- + 2 files changed, 24 insertions(+), 16 deletions(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -6428,12 +6428,12 @@ static int libbpf_preload_prog(struct bp + return 0; + } + +-static int +-load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, +- char *license, __u32 kern_version, int *pfd) ++static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_program *prog, ++ struct bpf_insn *insns, int insns_cnt, ++ const char *license, __u32 kern_version, ++ int *prog_fd) + { + struct bpf_prog_load_params load_attr = {}; +- struct bpf_object *obj = prog->obj; + char *cp, errmsg[STRERR_BUFSIZE]; + size_t log_buf_size = 0; + char *log_buf = NULL; +@@ -6494,7 +6494,7 @@ load_program(struct bpf_program *prog, s + if (obj->gen_loader) { + bpf_gen__prog_load(obj->gen_loader, &load_attr, + prog - obj->programs); +- *pfd = -1; ++ *prog_fd = -1; + return 0; + } + retry_load: +@@ -6532,7 +6532,7 @@ retry_load: + } + } + +- *pfd = ret; ++ *prog_fd = ret; + ret = 0; + goto out; + } +@@ -6608,11 +6608,12 @@ static int bpf_program__record_externs(s + return 0; + } + +-int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) ++static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog, ++ const char *license, __u32 kern_ver) + { + int err = 0, fd, i; + +- if (prog->obj->loaded) { ++ if (obj->loaded) { + pr_warn("prog '%s': can't load after object was loaded\n", prog->name); + return libbpf_err(-EINVAL); + } +@@ -6638,10 +6639,11 @@ int bpf_program__load(struct bpf_program + pr_warn("prog '%s': inconsistent nr(%d) != 1\n", + prog->name, prog->instances.nr); + } +- if (prog->obj->gen_loader) ++ if (obj->gen_loader) + bpf_program__record_externs(prog); +- err = load_program(prog, prog->insns, prog->insns_cnt, +- license, kern_ver, &fd); ++ err = bpf_object_load_prog_instance(obj, prog, ++ prog->insns, prog->insns_cnt, ++ license, kern_ver, &fd); + if (!err) + prog->instances.fds[0] = fd; + goto out; +@@ -6669,8 +6671,9 @@ int bpf_program__load(struct bpf_program + continue; + } + +- err = load_program(prog, result.new_insn_ptr, +- result.new_insn_cnt, license, kern_ver, &fd); ++ err = bpf_object_load_prog_instance(obj, prog, ++ result.new_insn_ptr, result.new_insn_cnt, ++ license, kern_ver, &fd); + if (err) { + pr_warn("Loading the %dth instance of program '%s' failed\n", + i, prog->name); +@@ -6687,6 +6690,11 @@ out: + return libbpf_err(err); + } + ++int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_ver) ++{ ++ return bpf_object_load_prog(prog->obj, prog, license, kern_ver); ++} ++ + static int + bpf_object__load_progs(struct bpf_object *obj, int log_level) + { +@@ -6710,7 +6718,7 @@ bpf_object__load_progs(struct bpf_object + continue; + } + prog->log_level |= log_level; +- err = bpf_program__load(prog, obj->license, obj->kern_version); ++ err = bpf_object_load_prog(obj, prog, obj->license, obj->kern_version); + if (err) + return err; + } +--- a/tools/lib/bpf/libbpf.h ++++ b/tools/lib/bpf/libbpf.h +@@ -261,8 +261,8 @@ LIBBPF_API const struct bpf_insn *bpf_pr + */ + LIBBPF_API size_t bpf_program__insn_cnt(const struct bpf_program *prog); + +-LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license, +- __u32 kern_version); ++LIBBPF_DEPRECATED_SINCE(0, 6, "use bpf_object__load() instead") ++LIBBPF_API int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_version); + LIBBPF_API int bpf_program__fd(const struct bpf_program *prog); + LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated") + LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog, diff --git a/patches.suse/libbpf-Detect-corrupted-ELF-symbols-section.patch b/patches.suse/libbpf-Detect-corrupted-ELF-symbols-section.patch new file mode 100644 index 0000000..119223c --- /dev/null +++ b/patches.suse/libbpf-Detect-corrupted-ELF-symbols-section.patch @@ -0,0 +1,30 @@ +From: Andrii Nakryiko +Date: Wed, 3 Nov 2021 10:32:09 -0700 +Subject: libbpf: Detect corrupted ELF symbols section +Patch-mainline: v5.17-rc1 +Git-commit: 833907876be55205d0ec153dcd819c014404ee16 +References: jsc#PED-1368 + +Prevent divide-by-zero if ELF is corrupted and has zero sh_entsize. +Reported by oss-fuzz project. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Acked-by: Yonghong Song +Link: https://lore.kernel.org/bpf/20211103173213.1376990-2-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -3555,7 +3555,7 @@ static int bpf_object__collect_externs(s + + scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx); + sh = elf_sec_hdr(obj, scn); +- if (!sh) ++ if (!sh || sh->sh_entsize != sizeof(Elf64_Sym)) + return -LIBBPF_ERRNO__FORMAT; + + dummy_var_btf_id = add_dummy_ksym_var(obj->btf); diff --git a/patches.suse/libbpf-Do-not-use-btf_dump__new-macro-in-C-mode.patch b/patches.suse/libbpf-Do-not-use-btf_dump__new-macro-in-C-mode.patch new file mode 100644 index 0000000..192981c --- /dev/null +++ b/patches.suse/libbpf-Do-not-use-btf_dump__new-macro-in-C-mode.patch @@ -0,0 +1,45 @@ +From: Jiri Olsa +Date: Thu, 23 Dec 2021 14:17:35 +0100 +Subject: libbpf: Do not use btf_dump__new() macro in C++ mode +Patch-mainline: v5.17-rc1 +Git-commit: 5652b807b7576f14c8b96e769470affef3287b7e +References: jsc#PED-1368 + +As reported in here [0], C++ compilers don't support +__builtin_types_compatible_p(), so at least don't screw up compilation +for them and let C++ users pick btf_dump__new vs +btf_dump__new_deprecated explicitly. + + [0] https://github.com/libbpf/libbpf/issues/283#issuecomment-986100727 + +Fixes: 6084f5dc928f ("libbpf: Ensure btf_dump__new() and btf_dump_opts are future-proof") +Signed-off-by: Jiri Olsa +Signed-off-by: Andrii Nakryiko +Acked-by: Yonghong Song +Link: https://lore.kernel.org/bpf/20211223131736.483956-1-jolsa@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/btf.h | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/tools/lib/bpf/btf.h ++++ b/tools/lib/bpf/btf.h +@@ -313,12 +313,18 @@ LIBBPF_API struct btf_dump *btf_dump__ne + * + * The rest works just like in case of ___libbpf_override() usage with symbol + * versioning. ++ * ++ * C++ compilers don't support __builtin_types_compatible_p(), so at least ++ * don't screw up compilation for them and let C++ users pick btf_dump__new ++ * vs btf_dump__new_deprecated explicitly. + */ ++#ifndef __cplusplus + #define btf_dump__new(a1, a2, a3, a4) __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(a4), btf_dump_printf_fn_t) || \ + __builtin_types_compatible_p(typeof(a4), void(void *, const char *, va_list)), \ + btf_dump__new_deprecated((void *)a1, (void *)a2, (void *)a3, (void *)a4), \ + btf_dump__new((void *)a1, (void *)a2, (void *)a3, (void *)a4)) ++#endif + + LIBBPF_API void btf_dump__free(struct btf_dump *d); + diff --git a/patches.suse/libbpf-Don-t-call-libc-APIs-with-NULL-pointers.patch b/patches.suse/libbpf-Don-t-call-libc-APIs-with-NULL-pointers.patch new file mode 100644 index 0000000..708e640 --- /dev/null +++ b/patches.suse/libbpf-Don-t-call-libc-APIs-with-NULL-pointers.patch @@ -0,0 +1,53 @@ +From: Andrii Nakryiko +Date: Tue, 23 Nov 2021 16:23:16 -0800 +Subject: libbpf: Don't call libc APIs with NULL pointers +Patch-mainline: v5.17-rc1 +Git-commit: 2a6a9bf26170b4e156c18706cd230934ebd2f95f +References: jsc#PED-1368 + +Sanitizer complains about qsort(), bsearch(), and memcpy() being called +with NULL pointer. This can only happen when the associated number of +elements is zero, so no harm should be done. But still prevent this from +happening to keep sanitizer runs clean from extra noise. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211124002325.1737739-5-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -3369,7 +3369,8 @@ static int bpf_object__elf_collect(struc + + /* sort BPF programs by section name and in-section instruction offset + * for faster search */ +- qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); ++ if (obj->nr_programs) ++ qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); + + return bpf_object__init_btf(obj, btf_data, btf_ext_data); + } +@@ -5823,6 +5824,8 @@ static int cmp_relo_by_insn_idx(const vo + + static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx) + { ++ if (!prog->nr_reloc) ++ return NULL; + return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc, + sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx); + } +@@ -5838,8 +5841,9 @@ static int append_subprog_relos(struct b + relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos)); + if (!relos) + return -ENOMEM; +- memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc, +- sizeof(*relos) * subprog->nr_reloc); ++ if (subprog->nr_reloc) ++ memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc, ++ sizeof(*relos) * subprog->nr_reloc); + + for (i = main_prog->nr_reloc; i < new_cnt; i++) + relos[i].insn_idx += subprog->sub_insn_off; diff --git a/patches.suse/libbpf-Don-t-validate-TYPE_ID-relo-s-original-imm-va.patch b/patches.suse/libbpf-Don-t-validate-TYPE_ID-relo-s-original-imm-va.patch new file mode 100644 index 0000000..7c172b1 --- /dev/null +++ b/patches.suse/libbpf-Don-t-validate-TYPE_ID-relo-s-original-imm-va.patch @@ -0,0 +1,90 @@ +From: Andrii Nakryiko +Date: Sun, 12 Dec 2021 17:07:06 -0800 +Subject: libbpf: Don't validate TYPE_ID relo's original imm value +Patch-mainline: v5.17-rc1 +Git-commit: 4b443bc1785f28df56fdbd6a107dc68ef7d5aa8e +References: jsc#PED-1368 + +During linking, type IDs in the resulting linked BPF object file can +change, and so ldimm64 instructions corresponding to +BPF_CORE_TYPE_ID_TARGET and BPF_CORE_TYPE_ID_LOCAL CO-RE relos can get +their imm value out of sync with actual CO-RE relocation information +that's updated by BPF linker properly during linking process. + +We could teach BPF linker to adjust such instructions, but it feels +a bit too much for linker to re-implement good chunk of +bpf_core_patch_insns logic just for this. This is a redundant safety +check for TYPE_ID relocations, as the real validation is in matching +CO-RE specs, so if that works fine, it's very unlikely that there is +something wrong with the instruction itself. + +So, instead, teach libbpf (and kernel) to ignore insn->imm for +BPF_CORE_TYPE_ID_TARGET and BPF_CORE_TYPE_ID_LOCAL relos. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211213010706.100231-1-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/relo_core.c | 19 ++++++++++++++----- + 1 file changed, 14 insertions(+), 5 deletions(-) + +--- a/tools/lib/bpf/relo_core.c ++++ b/tools/lib/bpf/relo_core.c +@@ -709,10 +709,14 @@ static int bpf_core_calc_field_relo(cons + + static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, + const struct bpf_core_spec *spec, +- __u32 *val) ++ __u32 *val, bool *validate) + { + __s64 sz; + ++ /* by default, always check expected value in bpf_insn */ ++ if (validate) ++ *validate = true; ++ + /* type-based relos return zero when target type is not found */ + if (!spec) { + *val = 0; +@@ -722,6 +726,11 @@ static int bpf_core_calc_type_relo(const + switch (relo->kind) { + case BPF_CORE_TYPE_ID_TARGET: + *val = spec->root_type_id; ++ /* type ID, embedded in bpf_insn, might change during linking, ++ * so enforcing it is pointless ++ */ ++ if (validate) ++ *validate = false; + break; + case BPF_CORE_TYPE_EXISTS: + *val = 1; +@@ -861,8 +870,8 @@ static int bpf_core_calc_relo(const char + res->fail_memsz_adjust = true; + } + } else if (core_relo_is_type_based(relo->kind)) { +- err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val); +- err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val); ++ err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val, &res->validate); ++ err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val, NULL); + } else if (core_relo_is_enumval_based(relo->kind)) { + err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val); + err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val); +@@ -1213,7 +1222,8 @@ int bpf_core_apply_relo_insn(const char + + /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ + if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) { +- targ_res.validate = true; ++ /* bpf_insn's imm value could get out of sync during linking */ ++ targ_res.validate = false; + targ_res.poison = false; + targ_res.orig_val = local_spec->root_type_id; + targ_res.new_val = local_spec->root_type_id; +@@ -1227,7 +1237,6 @@ int bpf_core_apply_relo_insn(const char + return -EOPNOTSUPP; + } + +- + for (i = 0, j = 0; i < cands->len; i++) { + err = bpf_core_spec_match(local_spec, cands->cands[i].btf, + cands->cands[i].id, cand_spec); diff --git a/patches.suse/libbpf-Ensure-btf_dump__new-and-btf_dump_opts-are-fu.patch b/patches.suse/libbpf-Ensure-btf_dump__new-and-btf_dump_opts-are-fu.patch new file mode 100644 index 0000000..b47eafc --- /dev/null +++ b/patches.suse/libbpf-Ensure-btf_dump__new-and-btf_dump_opts-are-fu.patch @@ -0,0 +1,192 @@ +From: Andrii Nakryiko +Date: Wed, 10 Nov 2021 21:36:19 -0800 +Subject: libbpf: Ensure btf_dump__new() and btf_dump_opts are future-proof +Patch-mainline: v5.17-rc1 +Git-commit: 6084f5dc928f2ada4331ba9eda65542e94d86bc6 +References: jsc#PED-1368 + +Change btf_dump__new() and corresponding struct btf_dump_ops structure +to be extensible by using OPTS "framework" ([0]). Given we don't change +the names, we use a similar approach as with bpf_prog_load(), but this +time we ended up with two APIs with the same name and same number of +arguments, so overloading based on number of arguments with +___libbpf_override() doesn't work. + +Instead, use "overloading" based on types. In this particular case, +print callback has to be specified, so we detect which argument is +a callback. If it's 4th (last) argument, old implementation of API is +used by user code. If not, it must be 2nd, and thus new implementation +is selected. The rest is handled by the same symbol versioning approach. + +btf_ext argument is dropped as it was never used and isn't necessary +either. If in the future we'll need btf_ext, that will be added into +OPTS-based struct btf_dump_opts. + +struct btf_dump_opts is reused for both old API and new APIs. ctx field +is marked deprecated in v0.7+ and it's put at the same memory location +as OPTS's sz field. Any user of new-style btf_dump__new() will have to +set sz field and doesn't/shouldn't use ctx, as ctx is now passed along +the callback as mandatory input argument, following the other APIs in +libbpf that accept callbacks consistently. + +Again, this is quite ugly in implementation, but is done in the name of +backwards compatibility and uniform and extensible future APIs (at the +same time, sigh). And it will be gone in libbpf 1.0. + + [0] Closes: https://github.com/libbpf/libbpf/issues/283 + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211111053624.190580-5-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/btf.h | 51 +++++++++++++++++++++++++++++++++++++++++++---- + tools/lib/bpf/btf_dump.c | 31 ++++++++++++++++++++-------- + tools/lib/bpf/libbpf.map | 2 + + 3 files changed, 71 insertions(+), 13 deletions(-) + +--- a/tools/lib/bpf/btf.h ++++ b/tools/lib/bpf/btf.h +@@ -267,15 +267,58 @@ LIBBPF_API int btf__dedup_deprecated(str + struct btf_dump; + + struct btf_dump_opts { +- void *ctx; ++ union { ++ size_t sz; ++ void *ctx; /* DEPRECATED: will be gone in v1.0 */ ++ }; + }; + + typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args); + + LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf, +- const struct btf_ext *btf_ext, +- const struct btf_dump_opts *opts, +- btf_dump_printf_fn_t printf_fn); ++ btf_dump_printf_fn_t printf_fn, ++ void *ctx, ++ const struct btf_dump_opts *opts); ++ ++LIBBPF_API struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf, ++ btf_dump_printf_fn_t printf_fn, ++ void *ctx, ++ const struct btf_dump_opts *opts); ++ ++LIBBPF_API struct btf_dump *btf_dump__new_deprecated(const struct btf *btf, ++ const struct btf_ext *btf_ext, ++ const struct btf_dump_opts *opts, ++ btf_dump_printf_fn_t printf_fn); ++ ++/* Choose either btf_dump__new() or btf_dump__new_deprecated() based on the ++ * type of 4th argument. If it's btf_dump's print callback, use deprecated ++ * API; otherwise, choose the new btf_dump__new(). ___libbpf_override() ++ * doesn't work here because both variants have 4 input arguments. ++ * ++ * (void *) casts are necessary to avoid compilation warnings about type ++ * mismatches, because even though __builtin_choose_expr() only ever evaluates ++ * one side the other side still has to satisfy type constraints (this is ++ * compiler implementation limitation which might be lifted eventually, ++ * according to the documentation). So passing struct btf_ext in place of ++ * btf_dump_printf_fn_t would be generating compilation warning. Casting to ++ * void * avoids this issue. ++ * ++ * Also, two type compatibility checks for a function and function pointer are ++ * required because passing function reference into btf_dump__new() as ++ * btf_dump__new(..., my_callback, ...) and as btf_dump__new(..., ++ * &my_callback, ...) (not explicit ampersand in the latter case) actually ++ * differs as far as __builtin_types_compatible_p() is concerned. Thus two ++ * checks are combined to detect callback argument. ++ * ++ * The rest works just like in case of ___libbpf_override() usage with symbol ++ * versioning. ++ */ ++#define btf_dump__new(a1, a2, a3, a4) __builtin_choose_expr( \ ++ __builtin_types_compatible_p(typeof(a4), btf_dump_printf_fn_t) || \ ++ __builtin_types_compatible_p(typeof(a4), void(void *, const char *, va_list)), \ ++ btf_dump__new_deprecated((void *)a1, (void *)a2, (void *)a3, (void *)a4), \ ++ btf_dump__new((void *)a1, (void *)a2, (void *)a3, (void *)a4)) ++ + LIBBPF_API void btf_dump__free(struct btf_dump *d); + + LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id); +--- a/tools/lib/bpf/btf_dump.c ++++ b/tools/lib/bpf/btf_dump.c +@@ -77,9 +77,8 @@ struct btf_dump_data { + + struct btf_dump { + const struct btf *btf; +- const struct btf_ext *btf_ext; + btf_dump_printf_fn_t printf_fn; +- struct btf_dump_opts opts; ++ void *cb_ctx; + int ptr_sz; + bool strip_mods; + bool skip_anon_defs; +@@ -138,29 +137,32 @@ static void btf_dump_printf(const struct + va_list args; + + va_start(args, fmt); +- d->printf_fn(d->opts.ctx, fmt, args); ++ d->printf_fn(d->cb_ctx, fmt, args); + va_end(args); + } + + static int btf_dump_mark_referenced(struct btf_dump *d); + static int btf_dump_resize(struct btf_dump *d); + +-struct btf_dump *btf_dump__new(const struct btf *btf, +- const struct btf_ext *btf_ext, +- const struct btf_dump_opts *opts, +- btf_dump_printf_fn_t printf_fn) ++DEFAULT_VERSION(btf_dump__new_v0_6_0, btf_dump__new, LIBBPF_0.6.0) ++struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf, ++ btf_dump_printf_fn_t printf_fn, ++ void *ctx, ++ const struct btf_dump_opts *opts) + { + struct btf_dump *d; + int err; + ++ if (!printf_fn) ++ return libbpf_err_ptr(-EINVAL); ++ + d = calloc(1, sizeof(struct btf_dump)); + if (!d) + return libbpf_err_ptr(-ENOMEM); + + d->btf = btf; +- d->btf_ext = btf_ext; + d->printf_fn = printf_fn; +- d->opts.ctx = opts ? opts->ctx : NULL; ++ d->cb_ctx = ctx; + d->ptr_sz = btf__pointer_size(btf) ? : sizeof(void *); + + d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); +@@ -186,6 +188,17 @@ err: + return libbpf_err_ptr(err); + } + ++COMPAT_VERSION(btf_dump__new_deprecated, btf_dump__new, LIBBPF_0.0.4) ++struct btf_dump *btf_dump__new_deprecated(const struct btf *btf, ++ const struct btf_ext *btf_ext, ++ const struct btf_dump_opts *opts, ++ btf_dump_printf_fn_t printf_fn) ++{ ++ if (!printf_fn) ++ return libbpf_err_ptr(-EINVAL); ++ return btf_dump__new_v0_6_0(btf, printf_fn, opts ? opts->ctx : NULL, opts); ++} ++ + static int btf_dump_resize(struct btf_dump *d) + { + int err, last_id = btf__type_cnt(d->btf) - 1; +--- a/tools/lib/bpf/libbpf.map ++++ b/tools/lib/bpf/libbpf.map +@@ -407,4 +407,6 @@ LIBBPF_0.6.0 { + btf__dedup_deprecated; + btf__raw_data; + btf__type_cnt; ++ btf_dump__new; ++ btf_dump__new_deprecated; + } LIBBPF_0.5.0; diff --git a/patches.suse/libbpf-Fix-a-couple-of-missed-btf_type_tag-handling-.patch b/patches.suse/libbpf-Fix-a-couple-of-missed-btf_type_tag-handling-.patch new file mode 100644 index 0000000..374669d --- /dev/null +++ b/patches.suse/libbpf-Fix-a-couple-of-missed-btf_type_tag-handling-.patch @@ -0,0 +1,55 @@ +From: Yonghong Song +Date: Mon, 15 Nov 2021 08:39:37 -0800 +Subject: libbpf: Fix a couple of missed btf_type_tag handling in btf.c +Patch-mainline: v5.17-rc1 +Git-commit: 69a055d546156adc6f7727ec981f721d5ba9231a +References: jsc#PED-1368 + +Commit 2dc1e488e5cd ("libbpf: Support BTF_KIND_TYPE_TAG") added the +BTF_KIND_TYPE_TAG support. But to test vmlinux build with ... + + #define __user __attribute__((btf_type_tag("user"))) + +... I needed to sync libbpf repo and manually copy libbpf sources to +pahole. To simplify process, I used BTF_KIND_RESTRICT to simulate the +BTF_KIND_TYPE_TAG with vmlinux build as "restrict" modifier is barely +used in kernel. + +But this approach missed one case in dedup with structures where +BTF_KIND_RESTRICT is handled and BTF_KIND_TYPE_TAG is not handled in +btf_dedup_is_equiv(), and this will result in a pahole dedup failure. +This patch fixed this issue and a selftest is added in the subsequent +patch to test this scenario. + +The other missed handling is in btf__resolve_size(). Currently the compiler +always emit like PTR->TYPE_TAG->... so in practice we don't hit the missing +BTF_KIND_TYPE_TAG handling issue with compiler generated code. But lets +add case BTF_KIND_TYPE_TAG in the switch statement to be future proof. + +Fixes: 2dc1e488e5cd ("libbpf: Support BTF_KIND_TYPE_TAG") +Signed-off-by: Yonghong Song +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211115163937.3922235-1-yhs@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/btf.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/tools/lib/bpf/btf.c ++++ b/tools/lib/bpf/btf.c +@@ -610,6 +610,7 @@ __s64 btf__resolve_size(const struct btf + case BTF_KIND_RESTRICT: + case BTF_KIND_VAR: + case BTF_KIND_DECL_TAG: ++ case BTF_KIND_TYPE_TAG: + type_id = t->type; + break; + case BTF_KIND_ARRAY: +@@ -4023,6 +4024,7 @@ static int btf_dedup_is_equiv(struct btf + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: ++ case BTF_KIND_TYPE_TAG: + if (cand_type->info != canon_type->info) + return 0; + return btf_dedup_is_equiv(d, cand_type->type, canon_type->type); diff --git a/patches.suse/libbpf-Fix-bpf_prog_load-log_buf-logic-for-log_level.patch b/patches.suse/libbpf-Fix-bpf_prog_load-log_buf-logic-for-log_level.patch new file mode 100644 index 0000000..bc4fdb3 --- /dev/null +++ b/patches.suse/libbpf-Fix-bpf_prog_load-log_buf-logic-for-log_level.patch @@ -0,0 +1,83 @@ +From: Andrii Nakryiko +Date: Thu, 9 Dec 2021 11:38:29 -0800 +Subject: libbpf: Fix bpf_prog_load() log_buf logic for log_level 0 +Patch-mainline: v5.17-rc1 +Git-commit: 4cf23a3c6359556a1cca489cf2b901e2b904c4b0 +References: jsc#PED-1368 + +To unify libbpf APIs behavior w.r.t. log_buf and log_level, fix +bpf_prog_load() to follow the same logic as bpf_btf_load() and +high-level bpf_object__load() API will follow in the subsequent patches: + - if log_level is 0 and non-NULL log_buf is provided by a user, attempt + load operation initially with no log_buf and log_level set; + - if successful, we are done, return new FD; + - on error, retry the load operation with log_level bumped to 1 and + log_buf set; this way verbose logging will be requested only when we + are sure that there is a failure, but will be fast in the + common/expected success case. + +Of course, user can still specify log_level > 0 from the very beginning +to force log collection. + +Suggested-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211209193840.1248570-2-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/bpf.c | 29 ++++++++++++++++------------- + 1 file changed, 16 insertions(+), 13 deletions(-) + +--- a/tools/lib/bpf/bpf.c ++++ b/tools/lib/bpf/bpf.c +@@ -303,10 +303,6 @@ int bpf_prog_load_v0_6_0(enum bpf_prog_t + if (log_level && !log_buf) + return libbpf_err(-EINVAL); + +- attr.log_level = log_level; +- attr.log_buf = ptr_to_u64(log_buf); +- attr.log_size = log_size; +- + func_info_rec_size = OPTS_GET(opts, func_info_rec_size, 0); + func_info = OPTS_GET(opts, func_info, NULL); + attr.func_info_rec_size = func_info_rec_size; +@@ -321,6 +317,12 @@ int bpf_prog_load_v0_6_0(enum bpf_prog_t + + attr.fd_array = ptr_to_u64(OPTS_GET(opts, fd_array, NULL)); + ++ if (log_level) { ++ attr.log_buf = ptr_to_u64(log_buf); ++ attr.log_size = log_size; ++ attr.log_level = log_level; ++ } ++ + fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); + if (fd >= 0) + return fd; +@@ -366,16 +368,17 @@ int bpf_prog_load_v0_6_0(enum bpf_prog_t + goto done; + } + +- if (log_level || !log_buf) +- goto done; +- +- /* Try again with log */ +- log_buf[0] = 0; +- attr.log_buf = ptr_to_u64(log_buf); +- attr.log_size = log_size; +- attr.log_level = 1; ++ if (log_level == 0 && log_buf) { ++ /* log_level == 0 with non-NULL log_buf requires retrying on error ++ * with log_level == 1 and log_buf/log_buf_size set, to get details of ++ * failure ++ */ ++ attr.log_buf = ptr_to_u64(log_buf); ++ attr.log_size = log_size; ++ attr.log_level = 1; + +- fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); ++ fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); ++ } + done: + /* free() doesn't affect errno, so we don't need to restore it */ + free(finfo); diff --git a/patches.suse/libbpf-Fix-gen_loader-assumption-on-number-of-progra.patch b/patches.suse/libbpf-Fix-gen_loader-assumption-on-number-of-progra.patch new file mode 100644 index 0000000..ba09590 --- /dev/null +++ b/patches.suse/libbpf-Fix-gen_loader-assumption-on-number-of-progra.patch @@ -0,0 +1,35 @@ +From: Alexei Starovoitov +Date: Sat, 11 Dec 2021 17:16:19 -0800 +Subject: libbpf: Fix gen_loader assumption on number of programs. +Patch-mainline: v5.17-rc1 +Git-commit: 259172bb6514758ce3be1610c500b51a9f44212a +References: jsc#PED-1368 + +libbpf's obj->nr_programs includes static and global functions. That number +could be higher than the actual number of bpf programs going be loaded by +gen_loader. Passing larger nr_programs to bpf_gen__init() doesn't hurt. Those +exra stack slots will stay as zero. bpf_gen__finish() needs to check that +actual number of progs that gen_loader saw is less than or equal to +obj->nr_programs. + +Fixes: ba05fd36b851 ("libbpf: Perform map fd cleanup for gen_loader in case of error") +Signed-off-by: Alexei Starovoitov +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/gen_loader.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/tools/lib/bpf/gen_loader.c ++++ b/tools/lib/bpf/gen_loader.c +@@ -371,8 +371,9 @@ int bpf_gen__finish(struct bpf_gen *gen, + { + int i; + +- if (nr_progs != gen->nr_progs || nr_maps != gen->nr_maps) { +- pr_warn("progs/maps mismatch\n"); ++ if (nr_progs < gen->nr_progs || nr_maps != gen->nr_maps) { ++ pr_warn("nr_progs %d/%d nr_maps %d/%d mismatch\n", ++ nr_progs, gen->nr_progs, nr_maps, gen->nr_maps); + gen->error = -EFAULT; + return gen->error; + } diff --git a/patches.suse/libbpf-Fix-glob_syms-memory-leak-in-bpf_linker.patch b/patches.suse/libbpf-Fix-glob_syms-memory-leak-in-bpf_linker.patch new file mode 100644 index 0000000..2b5f090 --- /dev/null +++ b/patches.suse/libbpf-Fix-glob_syms-memory-leak-in-bpf_linker.patch @@ -0,0 +1,28 @@ +From: Andrii Nakryiko +Date: Tue, 23 Nov 2021 16:23:17 -0800 +Subject: libbpf: Fix glob_syms memory leak in bpf_linker +Patch-mainline: v5.17-rc1 +Git-commit: 8cb125566c40b7141d8842c534f0ea5820ee3d5c +References: jsc#PED-1368 + +glob_syms array wasn't freed on bpf_link__free(). Fix that. + +Fixes: a46349227cd8 ("libbpf: Add linker extern resolution support for functions and global variables") +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211124002325.1737739-6-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/linker.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/tools/lib/bpf/linker.c ++++ b/tools/lib/bpf/linker.c +@@ -210,6 +210,7 @@ void bpf_linker__free(struct bpf_linker + } + free(linker->secs); + ++ free(linker->glob_syms); + free(linker); + } + diff --git a/patches.suse/libbpf-Fix-non-C89-loop-variable-declaration-in-gen_.patch b/patches.suse/libbpf-Fix-non-C89-loop-variable-declaration-in-gen_.patch new file mode 100644 index 0000000..0b290a8 --- /dev/null +++ b/patches.suse/libbpf-Fix-non-C89-loop-variable-declaration-in-gen_.patch @@ -0,0 +1,35 @@ +From: Andrii Nakryiko +Date: Fri, 5 Nov 2021 12:10:55 -0700 +Subject: libbpf: Fix non-C89 loop variable declaration in gen_loader.c +Patch-mainline: v5.17-rc1 +Git-commit: b8b5cb55f5d3f03cc1479a3768d68173a10359ad +References: jsc#PED-1368 + +Fix the `int i` declaration inside the for statement. This is non-C89 +compliant. See [0] for user report breaking BCC build. + + [0] https://github.com/libbpf/libbpf/issues/403 + +Fixes: 18f4fccbf314 ("libbpf: Update gen_loader to emit BTF_KIND_FUNC relocations") +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Acked-by: Kumar Kartikeya Dwivedi +Link: https://lore.kernel.org/bpf/20211105191055.3324874-1-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/gen_loader.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/tools/lib/bpf/gen_loader.c ++++ b/tools/lib/bpf/gen_loader.c +@@ -597,8 +597,9 @@ void bpf_gen__record_extern(struct bpf_g + static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_desc *relo) + { + struct ksym_desc *kdesc; ++ int i; + +- for (int i = 0; i < gen->nr_ksyms; i++) { ++ for (i = 0; i < gen->nr_ksyms; i++) { + if (!strcmp(gen->ksyms[i].name, relo->name)) { + gen->ksyms[i].ref++; + return &gen->ksyms[i]; diff --git a/patches.suse/libbpf-Fix-potential-misaligned-memory-access-in-btf.patch b/patches.suse/libbpf-Fix-potential-misaligned-memory-access-in-btf.patch new file mode 100644 index 0000000..a55b968 --- /dev/null +++ b/patches.suse/libbpf-Fix-potential-misaligned-memory-access-in-btf.patch @@ -0,0 +1,67 @@ +From: Andrii Nakryiko +Date: Tue, 23 Nov 2021 16:23:14 -0800 +Subject: libbpf: Fix potential misaligned memory access in btf_ext__new() +Patch-mainline: v5.17-rc1 +Git-commit: 401891a9debaf0a684502f2aaecf53448cee9414 +References: jsc#PED-1368 + +Perform a memory copy before we do the sanity checks of btf_ext_hdr. +This prevents misaligned memory access if raw btf_ext data is not 4-byte +aligned ([0]). + +While at it, also add missing const qualifier. + + [0] Closes: https://github.com/libbpf/libbpf/issues/391 + +Fixes: 2993e0515bb4 ("tools/bpf: add support to read .BTF.ext sections") +Reported-by: Evgeny Vereshchagin +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211124002325.1737739-3-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/btf.c | 10 +++++----- + tools/lib/bpf/btf.h | 2 +- + 2 files changed, 6 insertions(+), 6 deletions(-) + +--- a/tools/lib/bpf/btf.c ++++ b/tools/lib/bpf/btf.c +@@ -2731,15 +2731,11 @@ void btf_ext__free(struct btf_ext *btf_e + free(btf_ext); + } + +-struct btf_ext *btf_ext__new(__u8 *data, __u32 size) ++struct btf_ext *btf_ext__new(const __u8 *data, __u32 size) + { + struct btf_ext *btf_ext; + int err; + +- err = btf_ext_parse_hdr(data, size); +- if (err) +- return libbpf_err_ptr(err); +- + btf_ext = calloc(1, sizeof(struct btf_ext)); + if (!btf_ext) + return libbpf_err_ptr(-ENOMEM); +@@ -2752,6 +2748,10 @@ struct btf_ext *btf_ext__new(__u8 *data, + } + memcpy(btf_ext->data, data, size); + ++ err = btf_ext_parse_hdr(btf_ext->data, size); ++ if (err) ++ goto done; ++ + if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, line_info_len)) { + err = -EINVAL; + goto done; +--- a/tools/lib/bpf/btf.h ++++ b/tools/lib/bpf/btf.h +@@ -157,7 +157,7 @@ LIBBPF_API int btf__get_map_kv_tids(cons + __u32 expected_value_size, + __u32 *key_type_id, __u32 *value_type_id); + +-LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size); ++LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size); + LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext); + LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, + __u32 *size); diff --git a/patches.suse/libbpf-Fix-potential-uninit-memory-read.patch b/patches.suse/libbpf-Fix-potential-uninit-memory-read.patch new file mode 100644 index 0000000..c469d74 --- /dev/null +++ b/patches.suse/libbpf-Fix-potential-uninit-memory-read.patch @@ -0,0 +1,33 @@ +From: Andrii Nakryiko +Date: Mon, 13 Dec 2021 17:00:32 -0800 +Subject: libbpf: Fix potential uninit memory read +Patch-mainline: v5.17-rc1 +Git-commit: 4581e676d3be9d8c921a48bf18e92c5a8f31bf13 +References: jsc#PED-1368 + +In case of BPF_CORE_TYPE_ID_LOCAL we fill out target result explicitly. +But targ_res itself isn't initialized in such a case, and subsequent +call to bpf_core_patch_insn() might read uninitialized field (like +fail_memsz_adjust in this case). So ensure that targ_res is +zero-initialized for BPF_CORE_TYPE_ID_LOCAL case. + +This was reported by Coverity static analyzer. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211214010032.3843804-1-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/relo_core.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/tools/lib/bpf/relo_core.c ++++ b/tools/lib/bpf/relo_core.c +@@ -1223,6 +1223,7 @@ int bpf_core_apply_relo_insn(const char + /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ + if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) { + /* bpf_insn's imm value could get out of sync during linking */ ++ memset(&targ_res, 0, sizeof(targ_res)); + targ_res.validate = false; + targ_res.poison = false; + targ_res.orig_val = local_spec->root_type_id; diff --git a/patches.suse/libbpf-Fix-section-counting-logic.patch b/patches.suse/libbpf-Fix-section-counting-logic.patch new file mode 100644 index 0000000..246fa7a --- /dev/null +++ b/patches.suse/libbpf-Fix-section-counting-logic.patch @@ -0,0 +1,42 @@ +From: Andrii Nakryiko +Date: Wed, 3 Nov 2021 10:32:12 -0700 +Subject: libbpf: Fix section counting logic +Patch-mainline: v5.17-rc1 +Git-commit: 0d6988e16a12ebd41d3e268992211b0ceba44ed7 +References: jsc#PED-1368 + +e_shnum does include section #0 and as such is exactly the number of ELF +sections that we need to allocate memory for to use section indices as +array indices. Fix the off-by-one error. + +This is purely accounting fix, previously we were overallocating one +too many array items. But no correctness errors otherwise. + +Fixes: 25bbbd7a444b ("libbpf: Remove assumptions about uniqueness of .rodata/.data/.bss maps") +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Acked-by: Yonghong Song +Link: https://lore.kernel.org/bpf/20211103173213.1376990-5-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -3190,11 +3190,11 @@ static int bpf_object__elf_collect(struc + Elf_Scn *scn; + Elf64_Shdr *sh; + +- /* ELF section indices are 1-based, so allocate +1 element to keep +- * indexing simple. Also include 0th invalid section into sec_cnt for +- * simpler and more traditional iteration logic. ++ /* ELF section indices are 0-based, but sec #0 is special "invalid" ++ * section. e_shnum does include sec #0, so e_shnum is the necessary ++ * size of an array to keep all the sections. + */ +- obj->efile.sec_cnt = 1 + obj->efile.ehdr->e_shnum; ++ obj->efile.sec_cnt = obj->efile.ehdr->e_shnum; + obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs)); + if (!obj->efile.secs) + return -ENOMEM; diff --git a/patches.suse/libbpf-Fix-trivial-typo.patch b/patches.suse/libbpf-Fix-trivial-typo.patch new file mode 100644 index 0000000..7d91714 --- /dev/null +++ b/patches.suse/libbpf-Fix-trivial-typo.patch @@ -0,0 +1,34 @@ +From: huangxuesen +Date: Mon, 6 Dec 2021 09:47:16 +0800 +Subject: libbpf: Fix trivial typo +Patch-mainline: v5.17-rc1 +Git-commit: 222c98c7979084fbefb4ce2ae377210c6e42011e +References: jsc#PED-1368 + +Fix typo in comment from 'bpf_skeleton_map' to 'bpf_map_skeleton' +and from 'bpf_skeleton_prog' to 'bpf_prog_skeleton'. + +Signed-off-by: huangxuesen +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/1638755236-3851199-1-git-send-email-hxseverything@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/tools/lib/bpf/libbpf.h ++++ b/tools/lib/bpf/libbpf.h +@@ -985,11 +985,11 @@ struct bpf_object_skeleton { + struct bpf_object **obj; + + int map_cnt; +- int map_skel_sz; /* sizeof(struct bpf_skeleton_map) */ ++ int map_skel_sz; /* sizeof(struct bpf_map_skeleton) */ + struct bpf_map_skeleton *maps; + + int prog_cnt; +- int prog_skel_sz; /* sizeof(struct bpf_skeleton_prog) */ ++ int prog_skel_sz; /* sizeof(struct bpf_prog_skeleton) */ + struct bpf_prog_skeleton *progs; + }; + diff --git a/patches.suse/libbpf-Fix-typo-in-btf__dedup-LIBBPF_0.0.2-definitio.patch b/patches.suse/libbpf-Fix-typo-in-btf__dedup-LIBBPF_0.0.2-definitio.patch new file mode 100644 index 0000000..5757508 --- /dev/null +++ b/patches.suse/libbpf-Fix-typo-in-btf__dedup-LIBBPF_0.0.2-definitio.patch @@ -0,0 +1,33 @@ +From: Vincent Minet +Date: Fri, 10 Dec 2021 07:31:12 +0100 +Subject: libbpf: Fix typo in btf__dedup@LIBBPF_0.0.2 definition +Patch-mainline: v5.17-rc1 +Git-commit: b69c5c07a66ee569b8ccdc0cb567fe0622c89ea5 +References: jsc#PED-1368 + +The btf__dedup_deprecated name was misspelled in the definition of the +compat symbol for btf__dedup. This leads it to be missing from the +shared library. + +This fixes it. + +Fixes: 957d350a8b94 ("libbpf: Turn btf_dedup_opts into OPTS-based struct") +Signed-off-by: Vincent Minet +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211210063112.80047-1-vincent@vincent-minet.net +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/btf.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/lib/bpf/btf.c ++++ b/tools/lib/bpf/btf.c +@@ -3107,7 +3107,7 @@ done: + return libbpf_err(err); + } + +-COMPAT_VERSION(bpf__dedup_deprecated, btf__dedup, LIBBPF_0.0.2) ++COMPAT_VERSION(btf__dedup_deprecated, btf__dedup, LIBBPF_0.0.2) + int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *unused_opts) + { + LIBBPF_OPTS(btf_dedup_opts, opts, .btf_ext = btf_ext); diff --git a/patches.suse/libbpf-Fix-using-invalidated-memory-in-bpf_linker.patch b/patches.suse/libbpf-Fix-using-invalidated-memory-in-bpf_linker.patch new file mode 100644 index 0000000..ed17777 --- /dev/null +++ b/patches.suse/libbpf-Fix-using-invalidated-memory-in-bpf_linker.patch @@ -0,0 +1,41 @@ +From: Andrii Nakryiko +Date: Tue, 23 Nov 2021 16:23:18 -0800 +Subject: libbpf: Fix using invalidated memory in bpf_linker +Patch-mainline: v5.17-rc1 +Git-commit: 593835377f24ca1bb98008ec1dc3baefe491ad6e +References: jsc#PED-1368 + +add_dst_sec() can invalidate bpf_linker's section index making +dst_symtab pointer pointing into unallocated memory. Reinitialize +dst_symtab pointer on each iteration to make sure it's always valid. + +Fixes: faf6ed321cf6 ("libbpf: Add BPF static linker APIs") +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211124002325.1737739-7-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/linker.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/tools/lib/bpf/linker.c ++++ b/tools/lib/bpf/linker.c +@@ -2000,7 +2000,7 @@ add_sym: + static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj) + { + struct src_sec *src_symtab = &obj->secs[obj->symtab_sec_idx]; +- struct dst_sec *dst_symtab = &linker->secs[linker->symtab_sec_idx]; ++ struct dst_sec *dst_symtab; + int i, err; + + for (i = 1; i < obj->sec_cnt; i++) { +@@ -2033,6 +2033,9 @@ static int linker_append_elf_relos(struc + return -1; + } + ++ /* add_dst_sec() above could have invalidated linker->secs */ ++ dst_symtab = &linker->secs[linker->symtab_sec_idx]; ++ + /* shdr->sh_link points to SYMTAB */ + dst_sec->shdr->sh_link = linker->symtab_sec_idx; + diff --git a/patches.suse/libbpf-Improve-ELF-relo-sanitization.patch b/patches.suse/libbpf-Improve-ELF-relo-sanitization.patch new file mode 100644 index 0000000..e4ce479 --- /dev/null +++ b/patches.suse/libbpf-Improve-ELF-relo-sanitization.patch @@ -0,0 +1,79 @@ +From: Andrii Nakryiko +Date: Wed, 3 Nov 2021 10:32:13 -0700 +Subject: libbpf: Improve ELF relo sanitization +Patch-mainline: v5.17-rc1 +Git-commit: b7332d2820d394dd2ac127df1567b4da597355a1 +References: jsc#PED-1368 + +Add few sanity checks for relocations to prevent div-by-zero and +out-of-bounds array accesses in libbpf. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Acked-by: Yonghong Song +Link: https://lore.kernel.org/bpf/20211103173213.1376990-6-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 24 +++++++++++++++++++----- + 1 file changed, 19 insertions(+), 5 deletions(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -3306,6 +3306,10 @@ static int bpf_object__elf_collect(struc + } else if (sh->sh_type == SHT_REL) { + int targ_sec_idx = sh->sh_info; /* points to other section */ + ++ if (sh->sh_entsize != sizeof(Elf64_Rel) || ++ targ_sec_idx >= obj->efile.sec_cnt) ++ return -LIBBPF_ERRNO__FORMAT; ++ + /* Only do relo for section with exec instructions */ + if (!section_have_execinstr(obj, targ_sec_idx) && + strcmp(name, ".rel" STRUCT_OPS_SEC) && +@@ -4025,7 +4029,7 @@ static int + bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data) + { + const char *relo_sec_name, *sec_name; +- size_t sec_idx = shdr->sh_info; ++ size_t sec_idx = shdr->sh_info, sym_idx; + struct bpf_program *prog; + struct reloc_desc *relos; + int err, i, nrels; +@@ -4036,6 +4040,9 @@ bpf_object__collect_prog_relos(struct bp + Elf64_Sym *sym; + Elf64_Rel *rel; + ++ if (sec_idx >= obj->efile.sec_cnt) ++ return -EINVAL; ++ + scn = elf_sec_by_idx(obj, sec_idx); + scn_data = elf_sec_data(obj, scn); + +@@ -4055,16 +4062,23 @@ bpf_object__collect_prog_relos(struct bp + return -LIBBPF_ERRNO__FORMAT; + } + +- sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); ++ sym_idx = ELF64_R_SYM(rel->r_info); ++ sym = elf_sym_by_idx(obj, sym_idx); + if (!sym) { +- pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n", +- relo_sec_name, (size_t)ELF64_R_SYM(rel->r_info), i); ++ pr_warn("sec '%s': symbol #%zu not found for relo #%d\n", ++ relo_sec_name, sym_idx, i); ++ return -LIBBPF_ERRNO__FORMAT; ++ } ++ ++ if (sym->st_shndx >= obj->efile.sec_cnt) { ++ pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n", ++ relo_sec_name, sym_idx, (size_t)sym->st_shndx, i); + return -LIBBPF_ERRNO__FORMAT; + } + + if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) { + pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n", +- relo_sec_name, (size_t)ELF64_R_SYM(rel->r_info), i); ++ relo_sec_name, (size_t)rel->r_offset, i); + return -LIBBPF_ERRNO__FORMAT; + } + diff --git a/patches.suse/libbpf-Improve-LINUX_VERSION_CODE-detection.patch b/patches.suse/libbpf-Improve-LINUX_VERSION_CODE-detection.patch new file mode 100644 index 0000000..30c6b21 --- /dev/null +++ b/patches.suse/libbpf-Improve-LINUX_VERSION_CODE-detection.patch @@ -0,0 +1,107 @@ +From: Andrii Nakryiko +Date: Wed, 22 Dec 2021 15:10:03 -0800 +Subject: libbpf: Improve LINUX_VERSION_CODE detection +Patch-mainline: v5.17-rc1 +Git-commit: 5b3d72987701d51bf31823b39db49d10970f5c2d +References: jsc#PED-1368 + +Ubuntu reports incorrect kernel version through uname(), which on older +kernels leads to kprobe BPF programs failing to load due to the version +check mismatch. + +Accommodate Ubuntu's quirks with LINUX_VERSION_CODE by using +Ubuntu-specific /proc/version_code to fetch major/minor/patch versions +to form LINUX_VERSION_CODE. + +While at it, consolide libbpf's kernel version detection code between +libbpf.c and libbpf_probes.c. + + [0] Closes: https://github.com/libbpf/libbpf/issues/421 + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Acked-by: Yonghong Song +Link: https://lore.kernel.org/bpf/20211222231003.2334940-1-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 27 ++++++++++++++++++++++++++- + tools/lib/bpf/libbpf_internal.h | 2 ++ + tools/lib/bpf/libbpf_probes.c | 16 ---------------- + 3 files changed, 28 insertions(+), 17 deletions(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -795,11 +795,36 @@ bpf_object__add_programs(struct bpf_obje + return 0; + } + +-static __u32 get_kernel_version(void) ++__u32 get_kernel_version(void) + { ++ /* On Ubuntu LINUX_VERSION_CODE doesn't correspond to info.release, ++ * but Ubuntu provides /proc/version_signature file, as described at ++ * https://ubuntu.com/kernel, with an example contents below, which we ++ * can use to get a proper LINUX_VERSION_CODE. ++ * ++ * Ubuntu 5.4.0-12.15-generic 5.4.8 ++ * ++ * In the above, 5.4.8 is what kernel is actually expecting, while ++ * uname() call will return 5.4.0 in info.release. ++ */ ++ const char *ubuntu_kver_file = "/proc/version_signature"; + __u32 major, minor, patch; + struct utsname info; + ++ if (access(ubuntu_kver_file, R_OK) == 0) { ++ FILE *f; ++ ++ f = fopen(ubuntu_kver_file, "r"); ++ if (f) { ++ if (fscanf(f, "%*s %*s %d.%d.%d\n", &major, &minor, &patch) == 3) { ++ fclose(f); ++ return KERNEL_VERSION(major, minor, patch); ++ } ++ fclose(f); ++ } ++ /* something went wrong, fall back to uname() approach */ ++ } ++ + uname(&info); + if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3) + return 0; +--- a/tools/lib/bpf/libbpf_internal.h ++++ b/tools/lib/bpf/libbpf_internal.h +@@ -188,6 +188,8 @@ static inline void libbpf_strlcpy(char * + dst[i] = '\0'; + } + ++__u32 get_kernel_version(void); ++ + struct btf; + struct btf_type; + +--- a/tools/lib/bpf/libbpf_probes.c ++++ b/tools/lib/bpf/libbpf_probes.c +@@ -48,22 +48,6 @@ static int get_vendor_id(int ifindex) + return strtol(buf, NULL, 0); + } + +-static int get_kernel_version(void) +-{ +- int version, subversion, patchlevel; +- struct utsname utsn; +- +- /* Return 0 on failure, and attempt to probe with empty kversion */ +- if (uname(&utsn)) +- return 0; +- +- if (sscanf(utsn.release, "%d.%d.%d", +- &version, &subversion, &patchlevel) != 3) +- return 0; +- +- return (version << 16) + (subversion << 8) + patchlevel; +-} +- + static int probe_prog_load(enum bpf_prog_type prog_type, + const struct bpf_insn *insns, size_t insns_cnt, + char *log_buf, size_t log_buf_sz, diff --git a/patches.suse/libbpf-Improve-logging-around-BPF-program-loading.patch b/patches.suse/libbpf-Improve-logging-around-BPF-program-loading.patch new file mode 100644 index 0000000..9e0ec93 --- /dev/null +++ b/patches.suse/libbpf-Improve-logging-around-BPF-program-loading.patch @@ -0,0 +1,174 @@ +From: Andrii Nakryiko +Date: Thu, 9 Dec 2021 11:38:33 -0800 +Subject: libbpf: Improve logging around BPF program loading +Patch-mainline: v5.17-rc1 +Git-commit: ad9a7f96445b70c415d8e193f854321b110c890a +References: jsc#PED-1368 + +Add missing "prog '%s': " prefixes in few places and use consistently +markers for beginning and end of program load logs. Here's an example of +log output: + +libbpf: prog 'handler': BPF program load failed: Permission denied +libbpf: -- BEGIN PROG LOAD LOG --- +arg#0 reference type('UNKNOWN ') size cannot be determined: -22 +; out1 = in1; +0: (18) r1 = 0xffffc9000cdcc000 +2: (61) r1 = *(u32 *)(r1 +0) + +... + +81: (63) *(u32 *)(r4 +0) = r5 + R1_w=map_value(id=0,off=16,ks=4,vs=20,imm=0) R4=map_value(id=0,off=400,ks=4,vs=16,imm=0) +invalid access to map value, value_size=16 off=400 size=4 +R4 min value is outside of the allowed memory range +processed 63 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0 + -- END PROG LOAD LOG -- +libbpf: failed to load program 'handler' +libbpf: failed to load object 'test_skeleton' + +The entire verifier log, including BEGIN and END markers are now always +youtput during a single print callback call. This should make it much +easier to post-process or parse it, if necessary. It's not an explicit +API guarantee, but it can be reasonably expected to stay like that. + +Also __bpf_object__open is renamed to bpf_object_open() as it's always +an adventure to find the exact function that implements bpf_object's +open phase, so drop the double underscored and use internal libbpf +naming convention. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211209193840.1248570-6-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 38 ++++++++++---------- + tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c | 6 ++- + 2 files changed, 23 insertions(+), 21 deletions(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -6662,8 +6662,10 @@ retry_load: + ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr); + + if (ret >= 0) { +- if (log_buf && load_attr.log_level) +- pr_debug("verifier log:\n%s", log_buf); ++ if (log_buf && load_attr.log_level) { ++ pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", ++ prog->name, log_buf); ++ } + + if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) { + struct bpf_map *map; +@@ -6676,8 +6678,8 @@ retry_load: + + if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) { + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); +- pr_warn("prog '%s': failed to bind .rodata map: %s\n", +- prog->name, cp); ++ pr_warn("prog '%s': failed to bind map '%s': %s\n", ++ prog->name, map->real_name, cp); + /* Don't fail hard if can't bind rodata. */ + } + } +@@ -6691,23 +6693,22 @@ retry_load: + if (!log_buf || errno == ENOSPC) { + log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, + log_buf_size << 1); +- + free(log_buf); + goto retry_load; + } + ret = errno ? -errno : -LIBBPF_ERRNO__LOAD; + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); +- pr_warn("load bpf program failed: %s\n", cp); ++ pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); + pr_perm_msg(ret); + + if (log_buf && log_buf[0] != '\0') { + ret = -LIBBPF_ERRNO__VERIFY; +- pr_warn("-- BEGIN DUMP LOG ---\n"); +- pr_warn("\n%s\n", log_buf); +- pr_warn("-- END LOG --\n"); +- } else if (insns_cnt >= BPF_MAXINSNS) { +- pr_warn("Program too large (%d insns), at most %d insns\n", +- insns_cnt, BPF_MAXINSNS); ++ pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", ++ prog->name, log_buf); ++ } ++ if (insns_cnt >= BPF_MAXINSNS) { ++ pr_warn("prog '%s': program too large (%d insns), at most %d insns\n", ++ prog->name, insns_cnt, BPF_MAXINSNS); + ret = -LIBBPF_ERRNO__PROG2BIG; + } else if (prog->type != BPF_PROG_TYPE_KPROBE) { + /* Wrong program type? */ +@@ -6931,9 +6932,8 @@ static int bpf_object_init_progs(struct + return 0; + } + +-static struct bpf_object * +-__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, +- const struct bpf_object_open_opts *opts) ++static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, ++ const struct bpf_object_open_opts *opts) + { + const char *obj_name, *kconfig, *btf_tmp_path; + struct bpf_object *obj; +@@ -7033,7 +7033,7 @@ __bpf_object__open_xattr(struct bpf_obje + return NULL; + + pr_debug("loading %s\n", attr->file); +- return __bpf_object__open(attr->file, NULL, 0, &opts); ++ return bpf_object_open(attr->file, NULL, 0, &opts); + } + + struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr) +@@ -7059,7 +7059,7 @@ bpf_object__open_file(const char *path, + + pr_debug("loading %s\n", path); + +- return libbpf_ptr(__bpf_object__open(path, NULL, 0, opts)); ++ return libbpf_ptr(bpf_object_open(path, NULL, 0, opts)); + } + + struct bpf_object * +@@ -7069,7 +7069,7 @@ bpf_object__open_mem(const void *obj_buf + if (!obj_buf || obj_buf_sz == 0) + return libbpf_err_ptr(-EINVAL); + +- return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, opts)); ++ return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts)); + } + + struct bpf_object * +@@ -7086,7 +7086,7 @@ bpf_object__open_buffer(const void *obj_ + if (!obj_buf || obj_buf_sz == 0) + return errno = EINVAL, NULL; + +- return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, &opts)); ++ return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, &opts)); + } + + int bpf_object__unload(struct bpf_object *obj) +--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c ++++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +@@ -217,14 +217,16 @@ static bool found; + static int libbpf_debug_print(enum libbpf_print_level level, + const char *format, va_list args) + { +- char *log_buf; ++ const char *log_buf; + + if (level != LIBBPF_WARN || +- strcmp(format, "libbpf: \n%s\n")) { ++ !strstr(format, "-- BEGIN PROG LOAD LOG --")) { + vprintf(format, args); + return 0; + } + ++ /* skip prog_name */ ++ va_arg(args, char *); + log_buf = va_arg(args, char *); + if (!log_buf) + goto out; diff --git a/patches.suse/libbpf-Improve-sanity-checking-during-BTF-fix-up.patch b/patches.suse/libbpf-Improve-sanity-checking-during-BTF-fix-up.patch new file mode 100644 index 0000000..b29b9ac --- /dev/null +++ b/patches.suse/libbpf-Improve-sanity-checking-during-BTF-fix-up.patch @@ -0,0 +1,38 @@ +From: Andrii Nakryiko +Date: Wed, 3 Nov 2021 10:32:10 -0700 +Subject: libbpf: Improve sanity checking during BTF fix up +Patch-mainline: v5.17-rc1 +Git-commit: 88918dc12dc357a06d8d722a684617b1c87a4654 +References: jsc#PED-1368 + +If BTF is corrupted DATASEC's variable type ID might be incorrect. +Prevent this easy to detect situation with extra NULL check. +Reported by oss-fuzz project. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Acked-by: Yonghong Song +Link: https://lore.kernel.org/bpf/20211103173213.1376990-3-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -2752,13 +2752,12 @@ static int btf_fixup_datasec(struct bpf_ + + for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { + t_var = btf__type_by_id(btf, vsi->type); +- var = btf_var(t_var); +- +- if (!btf_is_var(t_var)) { ++ if (!t_var || !btf_is_var(t_var)) { + pr_debug("Non-VAR type seen in section %s\n", name); + return -EINVAL; + } + ++ var = btf_var(t_var); + if (var->linkage == BTF_VAR_STATIC) + continue; + diff --git a/patches.suse/libbpf-Load-global-data-maps-lazily-on-legacy-kernel.patch b/patches.suse/libbpf-Load-global-data-maps-lazily-on-legacy-kernel.patch new file mode 100644 index 0000000..36b53f6 --- /dev/null +++ b/patches.suse/libbpf-Load-global-data-maps-lazily-on-legacy-kernel.patch @@ -0,0 +1,102 @@ +From: Andrii Nakryiko +Date: Tue, 23 Nov 2021 12:01:04 -0800 +Subject: libbpf: Load global data maps lazily on legacy kernels +Patch-mainline: v5.17-rc1 +Git-commit: 16e0c35c6f7a2e90d52f3035ecf942af21417b7b +References: jsc#PED-1368 + +Load global data maps lazily, if kernel is too old to support global +data. Make sure that programs are still correct by detecting if any of +the to-be-loaded programs have relocation against any of such maps. + +This allows to solve the issue ([0]) with bpf_printk() and Clang +generating unnecessary and unreferenced .rodata.strX.Y sections, but it +also goes further along the CO-RE lines, allowing to have a BPF object +in which some code can work on very old kernels and relies only on BPF +maps explicitly, while other BPF programs might enjoy global variable +support. If such programs are correctly set to not load at runtime on +old kernels, bpf_object will load and function correctly now. + + [0] https://lore.kernel.org/bpf/CAK-59YFPU3qO+_pXWOH+c1LSA=8WA1yabJZfREjOEXNHAqgXNg@mail.gmail.com/ + +Fixes: aed659170a31 ("libbpf: Support multiple .rodata.* and .data.* BPF maps") +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Acked-by: Song Liu +Link: https://lore.kernel.org/bpf/20211123200105.387855-1-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 34 ++++++++++++++++++++++++++++++---- + 1 file changed, 30 insertions(+), 4 deletions(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -5006,6 +5006,24 @@ bpf_object__create_maps(struct bpf_objec + for (i = 0; i < obj->nr_maps; i++) { + map = &obj->maps[i]; + ++ /* To support old kernels, we skip creating global data maps ++ * (.rodata, .data, .kconfig, etc); later on, during program ++ * loading, if we detect that at least one of the to-be-loaded ++ * programs is referencing any global data map, we'll error ++ * out with program name and relocation index logged. ++ * This approach allows to accommodate Clang emitting ++ * unnecessary .rodata.str1.1 sections for string literals, ++ * but also it allows to have CO-RE applications that use ++ * global variables in some of BPF programs, but not others. ++ * If those global variable-using programs are not loaded at ++ * runtime due to bpf_program__set_autoload(prog, false), ++ * bpf_object loading will succeed just fine even on old ++ * kernels. ++ */ ++ if (bpf_map__is_internal(map) && ++ !kernel_supports(obj, FEAT_GLOBAL_DATA)) ++ continue; ++ + retried = false; + retry: + if (map->pin_path) { +@@ -5605,6 +5623,14 @@ bpf_object__relocate_data(struct bpf_obj + insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; + insn[0].imm = relo->map_idx; + } else { ++ const struct bpf_map *map = &obj->maps[relo->map_idx]; ++ ++ if (bpf_map__is_internal(map) && ++ !kernel_supports(obj, FEAT_GLOBAL_DATA)) { ++ pr_warn("prog '%s': relo #%d: kernel doesn't support global data\n", ++ prog->name, i); ++ return -ENOTSUP; ++ } + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn[0].imm = obj->maps[relo->map_idx].fd; + } +@@ -6139,6 +6165,8 @@ bpf_object__relocate(struct bpf_object * + */ + if (prog_is_subprog(obj, prog)) + continue; ++ if (!prog->load) ++ continue; + + err = bpf_object__relocate_calls(obj, prog); + if (err) { +@@ -6152,6 +6180,8 @@ bpf_object__relocate(struct bpf_object * + prog = &obj->programs[i]; + if (prog_is_subprog(obj, prog)) + continue; ++ if (!prog->load) ++ continue; + err = bpf_object__relocate_data(obj, prog); + if (err) { + pr_warn("prog '%s': failed to relocate data references: %d\n", +@@ -6937,10 +6967,6 @@ static int bpf_object__sanitize_maps(str + bpf_object__for_each_map(m, obj) { + if (!bpf_map__is_internal(m)) + continue; +- if (!kernel_supports(obj, FEAT_GLOBAL_DATA)) { +- pr_warn("kernel doesn't support global data\n"); +- return -ENOTSUP; +- } + if (!kernel_supports(obj, FEAT_ARRAY_MMAP)) + m->def.map_flags ^= BPF_F_MMAPABLE; + } diff --git a/patches.suse/libbpf-Make-perf_buffer__new-use-OPTS-based-interfac.patch b/patches.suse/libbpf-Make-perf_buffer__new-use-OPTS-based-interfac.patch new file mode 100644 index 0000000..7a7156d --- /dev/null +++ b/patches.suse/libbpf-Make-perf_buffer__new-use-OPTS-based-interfac.patch @@ -0,0 +1,261 @@ +From: Andrii Nakryiko +Date: Wed, 10 Nov 2021 21:36:20 -0800 +Subject: libbpf: Make perf_buffer__new() use OPTS-based interface +Patch-mainline: v5.17-rc1 +Git-commit: 4178893465774f91dcd49465ae6f4e3cc036b7b2 +References: jsc#PED-1368 + +Add new variants of perf_buffer__new() and perf_buffer__new_raw() that +use OPTS-based options for future extensibility ([0]). Given all the +currently used API names are best fits, re-use them and use +___libbpf_override() approach and symbol versioning to preserve ABI and +source code compatibility. struct perf_buffer_opts and struct +perf_buffer_raw_opts are kept as well, but they are restructured such +that they are OPTS-based when used with new APIs. For struct +perf_buffer_raw_opts we keep few fields intact, so we have to also +preserve the memory location of them both when used as OPTS and for +legacy API variants. This is achieved with anonymous padding for OPTS +"incarnation" of the struct. These pads can be eventually used for new +options. + + [0] Closes: https://github.com/libbpf/libbpf/issues/311 + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211111053624.190580-6-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 70 ++++++++++++++++++++++++++++++-------- + tools/lib/bpf/libbpf.h | 86 +++++++++++++++++++++++++++++++++++++++-------- + tools/lib/bpf/libbpf.map | 4 ++ + 3 files changed, 132 insertions(+), 28 deletions(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -10562,11 +10562,18 @@ error: + static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, + struct perf_buffer_params *p); + +-struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, +- const struct perf_buffer_opts *opts) ++DEFAULT_VERSION(perf_buffer__new_v0_6_0, perf_buffer__new, LIBBPF_0.6.0) ++struct perf_buffer *perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt, ++ perf_buffer_sample_fn sample_cb, ++ perf_buffer_lost_fn lost_cb, ++ void *ctx, ++ const struct perf_buffer_opts *opts) + { + struct perf_buffer_params p = {}; +- struct perf_event_attr attr = { 0, }; ++ struct perf_event_attr attr = {}; ++ ++ if (!OPTS_VALID(opts, perf_buffer_opts)) ++ return libbpf_err_ptr(-EINVAL); + + attr.config = PERF_COUNT_SW_BPF_OUTPUT; + attr.type = PERF_TYPE_SOFTWARE; +@@ -10575,29 +10582,62 @@ struct perf_buffer *perf_buffer__new(int + attr.wakeup_events = 1; + + p.attr = &attr; +- p.sample_cb = opts ? opts->sample_cb : NULL; +- p.lost_cb = opts ? opts->lost_cb : NULL; +- p.ctx = opts ? opts->ctx : NULL; ++ p.sample_cb = sample_cb; ++ p.lost_cb = lost_cb; ++ p.ctx = ctx; + + return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); + } + +-struct perf_buffer * +-perf_buffer__new_raw(int map_fd, size_t page_cnt, +- const struct perf_buffer_raw_opts *opts) ++COMPAT_VERSION(perf_buffer__new_deprecated, perf_buffer__new, LIBBPF_0.0.4) ++struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt, ++ const struct perf_buffer_opts *opts) ++{ ++ return perf_buffer__new_v0_6_0(map_fd, page_cnt, ++ opts ? opts->sample_cb : NULL, ++ opts ? opts->lost_cb : NULL, ++ opts ? opts->ctx : NULL, ++ NULL); ++} ++ ++DEFAULT_VERSION(perf_buffer__new_raw_v0_6_0, perf_buffer__new_raw, LIBBPF_0.6.0) ++struct perf_buffer *perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, ++ struct perf_event_attr *attr, ++ perf_buffer_event_fn event_cb, void *ctx, ++ const struct perf_buffer_raw_opts *opts) + { + struct perf_buffer_params p = {}; + +- p.attr = opts->attr; +- p.event_cb = opts->event_cb; +- p.ctx = opts->ctx; +- p.cpu_cnt = opts->cpu_cnt; +- p.cpus = opts->cpus; +- p.map_keys = opts->map_keys; ++ if (page_cnt == 0 || !attr) ++ return libbpf_err_ptr(-EINVAL); ++ ++ if (!OPTS_VALID(opts, perf_buffer_raw_opts)) ++ return libbpf_err_ptr(-EINVAL); ++ ++ p.attr = attr; ++ p.event_cb = event_cb; ++ p.ctx = ctx; ++ p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0); ++ p.cpus = OPTS_GET(opts, cpus, NULL); ++ p.map_keys = OPTS_GET(opts, map_keys, NULL); + + return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); + } + ++COMPAT_VERSION(perf_buffer__new_raw_deprecated, perf_buffer__new_raw, LIBBPF_0.0.4) ++struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt, ++ const struct perf_buffer_raw_opts *opts) ++{ ++ LIBBPF_OPTS(perf_buffer_raw_opts, inner_opts, ++ .cpu_cnt = opts->cpu_cnt, ++ .cpus = opts->cpus, ++ .map_keys = opts->map_keys, ++ ); ++ ++ return perf_buffer__new_raw_v0_6_0(map_fd, page_cnt, opts->attr, ++ opts->event_cb, opts->ctx, &inner_opts); ++} ++ + static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, + struct perf_buffer_params *p) + { +--- a/tools/lib/bpf/libbpf.h ++++ b/tools/lib/bpf/libbpf.h +@@ -728,18 +728,52 @@ typedef void (*perf_buffer_lost_fn)(void + + /* common use perf buffer options */ + struct perf_buffer_opts { +- /* if specified, sample_cb is called for each sample */ +- perf_buffer_sample_fn sample_cb; +- /* if specified, lost_cb is called for each batch of lost samples */ +- perf_buffer_lost_fn lost_cb; +- /* ctx is provided to sample_cb and lost_cb */ +- void *ctx; ++ union { ++ size_t sz; ++ struct { /* DEPRECATED: will be removed in v1.0 */ ++ /* if specified, sample_cb is called for each sample */ ++ perf_buffer_sample_fn sample_cb; ++ /* if specified, lost_cb is called for each batch of lost samples */ ++ perf_buffer_lost_fn lost_cb; ++ /* ctx is provided to sample_cb and lost_cb */ ++ void *ctx; ++ }; ++ }; + }; ++#define perf_buffer_opts__last_field sz + ++/** ++ * @brief **perf_buffer__new()** creates BPF perfbuf manager for a specified ++ * BPF_PERF_EVENT_ARRAY map ++ * @param map_fd FD of BPF_PERF_EVENT_ARRAY BPF map that will be used by BPF ++ * code to send data over to user-space ++ * @param page_cnt number of memory pages allocated for each per-CPU buffer ++ * @param sample_cb function called on each received data record ++ * @param lost_cb function called when record loss has occurred ++ * @param ctx user-provided extra context passed into *sample_cb* and *lost_cb* ++ * @return a new instance of struct perf_buffer on success, NULL on error with ++ * *errno* containing an error code ++ */ + LIBBPF_API struct perf_buffer * + perf_buffer__new(int map_fd, size_t page_cnt, ++ perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx, + const struct perf_buffer_opts *opts); + ++LIBBPF_API struct perf_buffer * ++perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt, ++ perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx, ++ const struct perf_buffer_opts *opts); ++ ++LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new() instead") ++struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt, ++ const struct perf_buffer_opts *opts); ++ ++#define perf_buffer__new(...) ___libbpf_overload(___perf_buffer_new, __VA_ARGS__) ++#define ___perf_buffer_new6(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts) \ ++ perf_buffer__new(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts) ++#define ___perf_buffer_new3(map_fd, page_cnt, opts) \ ++ perf_buffer__new_deprecated(map_fd, page_cnt, opts) ++ + enum bpf_perf_event_ret { + LIBBPF_PERF_EVENT_DONE = 0, + LIBBPF_PERF_EVENT_ERROR = -1, +@@ -753,12 +787,21 @@ typedef enum bpf_perf_event_ret + + /* raw perf buffer options, giving most power and control */ + struct perf_buffer_raw_opts { +- /* perf event attrs passed directly into perf_event_open() */ +- struct perf_event_attr *attr; +- /* raw event callback */ +- perf_buffer_event_fn event_cb; +- /* ctx is provided to event_cb */ +- void *ctx; ++ union { ++ struct { ++ size_t sz; ++ long :0; ++ long :0; ++ }; ++ struct { /* DEPRECATED: will be removed in v1.0 */ ++ /* perf event attrs passed directly into perf_event_open() */ ++ struct perf_event_attr *attr; ++ /* raw event callback */ ++ perf_buffer_event_fn event_cb; ++ /* ctx is provided to event_cb */ ++ void *ctx; ++ }; ++ }; + /* if cpu_cnt == 0, open all on all possible CPUs (up to the number of + * max_entries of given PERF_EVENT_ARRAY map) + */ +@@ -768,11 +811,28 @@ struct perf_buffer_raw_opts { + /* if cpu_cnt > 0, map_keys specify map keys to set per-CPU FDs for */ + int *map_keys; + }; ++#define perf_buffer_raw_opts__last_field map_keys + + LIBBPF_API struct perf_buffer * +-perf_buffer__new_raw(int map_fd, size_t page_cnt, ++perf_buffer__new_raw(int map_fd, size_t page_cnt, struct perf_event_attr *attr, ++ perf_buffer_event_fn event_cb, void *ctx, + const struct perf_buffer_raw_opts *opts); + ++LIBBPF_API struct perf_buffer * ++perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, struct perf_event_attr *attr, ++ perf_buffer_event_fn event_cb, void *ctx, ++ const struct perf_buffer_raw_opts *opts); ++ ++LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new_raw() instead") ++struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt, ++ const struct perf_buffer_raw_opts *opts); ++ ++#define perf_buffer__new_raw(...) ___libbpf_overload(___perf_buffer_new_raw, __VA_ARGS__) ++#define ___perf_buffer_new_raw6(map_fd, page_cnt, attr, event_cb, ctx, opts) \ ++ perf_buffer__new_raw(map_fd, page_cnt, attr, event_cb, ctx, opts) ++#define ___perf_buffer_new_raw3(map_fd, page_cnt, opts) \ ++ perf_buffer__new_raw_deprecated(map_fd, page_cnt, opts) ++ + LIBBPF_API void perf_buffer__free(struct perf_buffer *pb); + LIBBPF_API int perf_buffer__epoll_fd(const struct perf_buffer *pb); + LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms); +--- a/tools/lib/bpf/libbpf.map ++++ b/tools/lib/bpf/libbpf.map +@@ -409,4 +409,8 @@ LIBBPF_0.6.0 { + btf__type_cnt; + btf_dump__new; + btf_dump__new_deprecated; ++ perf_buffer__new; ++ perf_buffer__new_deprecated; ++ perf_buffer__new_raw; ++ perf_buffer__new_raw_deprecated; + } LIBBPF_0.5.0; diff --git a/patches.suse/libbpf-Mark-bpf_object__find_program_by_title-API-de.patch b/patches.suse/libbpf-Mark-bpf_object__find_program_by_title-API-de.patch new file mode 100644 index 0000000..517190d --- /dev/null +++ b/patches.suse/libbpf-Mark-bpf_object__find_program_by_title-API-de.patch @@ -0,0 +1,32 @@ +From: Kui-Feng Lee +Date: Mon, 13 Dec 2021 19:59:31 -0800 +Subject: libbpf: Mark bpf_object__find_program_by_title API deprecated. +Patch-mainline: v5.17-rc1 +Git-commit: 0da2596f343c0b234344ec47d51cbce3bde23dea +References: jsc#PED-1368 + +Deprecate this API since v0.7. All callers should move to +bpf_object__find_program_by_name if possible, otherwise use +bpf_object__for_each_program to find a program out from a given +section. + +[0] Closes: https://github.com/libbpf/libbpf/issues/292 + +Signed-off-by: Kui-Feng Lee +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211214035931.1148209-5-kuifeng@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/tools/lib/bpf/libbpf.h ++++ b/tools/lib/bpf/libbpf.h +@@ -226,6 +226,7 @@ struct btf; + LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj); + LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); + ++LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__find_program_by_name() instead") + LIBBPF_API struct bpf_program * + bpf_object__find_program_by_title(const struct bpf_object *obj, + const char *title); diff --git a/patches.suse/libbpf-Normalize-PT_REGS_xxx-macro-definitions.patch b/patches.suse/libbpf-Normalize-PT_REGS_xxx-macro-definitions.patch new file mode 100644 index 0000000..5f14c68 --- /dev/null +++ b/patches.suse/libbpf-Normalize-PT_REGS_xxx-macro-definitions.patch @@ -0,0 +1,466 @@ +From: Andrii Nakryiko +Date: Wed, 22 Dec 2021 13:39:23 -0800 +Subject: libbpf: Normalize PT_REGS_xxx() macro definitions +Patch-mainline: v5.17-rc1 +Git-commit: 3cc31d794097a0de5ac619d4a20b1975139e6b05 +References: jsc#PED-1368 + +Refactor PT_REGS macros definitions in bpf_tracing.h to avoid excessive +duplication. We currently have classic PT_REGS_xxx() and CO-RE-enabled +PT_REGS_xxx_CORE(). We are about to add also _SYSCALL variants, which +would require excessive copying of all the per-architecture definitions. + +Instead, separate architecture-specific field/register names from the +final macro that utilize them. That way for upcoming _SYSCALL variants +we'll be able to just define x86_64 exception and otherwise have one +common set of _SYSCALL macro definitions common for all architectures. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Tested-by: Ilya Leoshkevich +Acked-by: Yonghong Song +Acked-by: Ilya Leoshkevich +Link: https://lore.kernel.org/bpf/20211222213924.1869758-1-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/bpf_tracing.h | 373 +++++++++++++++++--------------------------- + 1 file changed, 150 insertions(+), 223 deletions(-) + +--- a/tools/lib/bpf/bpf_tracing.h ++++ b/tools/lib/bpf/bpf_tracing.h +@@ -66,277 +66,204 @@ + + #if defined(__KERNEL__) || defined(__VMLINUX_H__) + +-#define PT_REGS_PARM1(x) ((x)->di) +-#define PT_REGS_PARM2(x) ((x)->si) +-#define PT_REGS_PARM3(x) ((x)->dx) +-#define PT_REGS_PARM4(x) ((x)->cx) +-#define PT_REGS_PARM5(x) ((x)->r8) +-#define PT_REGS_RET(x) ((x)->sp) +-#define PT_REGS_FP(x) ((x)->bp) +-#define PT_REGS_RC(x) ((x)->ax) +-#define PT_REGS_SP(x) ((x)->sp) +-#define PT_REGS_IP(x) ((x)->ip) +- +-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), di) +-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), si) +-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), dx) +-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), cx) +-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8) +-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), sp) +-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), bp) +-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), ax) +-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp) +-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), ip) ++#define __PT_PARM1_REG di ++#define __PT_PARM2_REG si ++#define __PT_PARM3_REG dx ++#define __PT_PARM4_REG cx ++#define __PT_PARM5_REG r8 ++#define __PT_RET_REG sp ++#define __PT_FP_REG bp ++#define __PT_RC_REG ax ++#define __PT_SP_REG sp ++#define __PT_IP_REG ip + + #else + + #ifdef __i386__ +-/* i386 kernel is built with -mregparm=3 */ +-#define PT_REGS_PARM1(x) ((x)->eax) +-#define PT_REGS_PARM2(x) ((x)->edx) +-#define PT_REGS_PARM3(x) ((x)->ecx) +-#define PT_REGS_PARM4(x) 0 +-#define PT_REGS_PARM5(x) 0 +-#define PT_REGS_RET(x) ((x)->esp) +-#define PT_REGS_FP(x) ((x)->ebp) +-#define PT_REGS_RC(x) ((x)->eax) +-#define PT_REGS_SP(x) ((x)->esp) +-#define PT_REGS_IP(x) ((x)->eip) +- +-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), eax) +-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), edx) +-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), ecx) +-#define PT_REGS_PARM4_CORE(x) 0 +-#define PT_REGS_PARM5_CORE(x) 0 +-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), esp) +-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), ebp) +-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), eax) +-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), esp) +-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), eip) + +-#else ++#define __PT_PARM1_REG eax ++#define __PT_PARM2_REG edx ++#define __PT_PARM3_REG ecx ++/* i386 kernel is built with -mregparm=3 */ ++#define __PT_PARM4_REG __unsupported__ ++#define __PT_PARM5_REG __unsupported__ ++#define __PT_RET_REG esp ++#define __PT_FP_REG ebp ++#define __PT_RC_REG eax ++#define __PT_SP_REG esp ++#define __PT_IP_REG eip ++ ++#else /* __i386__ */ ++ ++#define __PT_PARM1_REG rdi ++#define __PT_PARM2_REG rsi ++#define __PT_PARM3_REG rdx ++#define __PT_PARM4_REG rcx ++#define __PT_PARM5_REG r8 ++#define __PT_RET_REG rsp ++#define __PT_FP_REG rbp ++#define __PT_RC_REG rax ++#define __PT_SP_REG rsp ++#define __PT_IP_REG rip + +-#define PT_REGS_PARM1(x) ((x)->rdi) +-#define PT_REGS_PARM2(x) ((x)->rsi) +-#define PT_REGS_PARM3(x) ((x)->rdx) +-#define PT_REGS_PARM4(x) ((x)->rcx) +-#define PT_REGS_PARM5(x) ((x)->r8) +-#define PT_REGS_RET(x) ((x)->rsp) +-#define PT_REGS_FP(x) ((x)->rbp) +-#define PT_REGS_RC(x) ((x)->rax) +-#define PT_REGS_SP(x) ((x)->rsp) +-#define PT_REGS_IP(x) ((x)->rip) +- +-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), rdi) +-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), rsi) +-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), rdx) +-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), rcx) +-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8) +-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), rsp) +-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), rbp) +-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), rax) +-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), rsp) +-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), rip) ++#endif /* __i386__ */ + +-#endif +-#endif ++#endif /* __KERNEL__ || __VMLINUX_H__ */ + + #elif defined(bpf_target_s390) + + /* s390 provides user_pt_regs instead of struct pt_regs to userspace */ +-struct pt_regs; +-#define PT_REGS_S390 const volatile user_pt_regs +-#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2]) +-#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3]) +-#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4]) +-#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5]) +-#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6]) +-#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14]) +-/* Works only with CONFIG_FRAME_POINTER */ +-#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11]) +-#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2]) +-#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15]) +-#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr) +- +-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) +-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[3]) +-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[4]) +-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[5]) +-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[6]) +-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[14]) +-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[11]) +-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) +-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[15]) +-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), psw.addr) ++#define __PT_REGS_CAST(x) ((const user_pt_regs *)(x)) ++#define __PT_PARM1_REG gprs[2] ++#define __PT_PARM2_REG gprs[3] ++#define __PT_PARM3_REG gprs[4] ++#define __PT_PARM4_REG gprs[5] ++#define __PT_PARM5_REG gprs[6] ++#define __PT_RET_REG grps[14] ++#define __PT_FP_REG gprs[11] /* Works only with CONFIG_FRAME_POINTER */ ++#define __PT_RC_REG gprs[2] ++#define __PT_SP_REG gprs[15] ++#define __PT_IP_REG psw.addr + + #elif defined(bpf_target_arm) + +-#define PT_REGS_PARM1(x) ((x)->uregs[0]) +-#define PT_REGS_PARM2(x) ((x)->uregs[1]) +-#define PT_REGS_PARM3(x) ((x)->uregs[2]) +-#define PT_REGS_PARM4(x) ((x)->uregs[3]) +-#define PT_REGS_PARM5(x) ((x)->uregs[4]) +-#define PT_REGS_RET(x) ((x)->uregs[14]) +-#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */ +-#define PT_REGS_RC(x) ((x)->uregs[0]) +-#define PT_REGS_SP(x) ((x)->uregs[13]) +-#define PT_REGS_IP(x) ((x)->uregs[12]) +- +-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), uregs[0]) +-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), uregs[1]) +-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), uregs[2]) +-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), uregs[3]) +-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), uregs[4]) +-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), uregs[14]) +-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), uregs[11]) +-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), uregs[0]) +-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), uregs[13]) +-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), uregs[12]) ++#define __PT_PARM1_REG uregs[0] ++#define __PT_PARM2_REG uregs[1] ++#define __PT_PARM3_REG uregs[2] ++#define __PT_PARM4_REG uregs[3] ++#define __PT_PARM5_REG uregs[4] ++#define __PT_RET_REG uregs[14] ++#define __PT_FP_REG uregs[11] /* Works only with CONFIG_FRAME_POINTER */ ++#define __PT_RC_REG uregs[0] ++#define __PT_SP_REG uregs[13] ++#define __PT_IP_REG uregs[12] + + #elif defined(bpf_target_arm64) + + /* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ +-struct pt_regs; +-#define PT_REGS_ARM64 const volatile struct user_pt_regs +-#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0]) +-#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1]) +-#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2]) +-#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3]) +-#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4]) +-#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30]) +-/* Works only with CONFIG_FRAME_POINTER */ +-#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29]) +-#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0]) +-#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp) +-#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc) +- +-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0]) +-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[1]) +-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[2]) +-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[3]) +-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[4]) +-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[30]) +-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[29]) +-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0]) +-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), sp) +-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), pc) ++#define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x)) ++#define __PT_PARM1_REG regs[0] ++#define __PT_PARM2_REG regs[1] ++#define __PT_PARM3_REG regs[2] ++#define __PT_PARM4_REG regs[3] ++#define __PT_PARM5_REG regs[4] ++#define __PT_RET_REG regs[30] ++#define __PT_FP_REG regs[29] /* Works only with CONFIG_FRAME_POINTER */ ++#define __PT_RC_REG regs[0] ++#define __PT_SP_REG sp ++#define __PT_IP_REG pc + + #elif defined(bpf_target_mips) + +-#define PT_REGS_PARM1(x) ((x)->regs[4]) +-#define PT_REGS_PARM2(x) ((x)->regs[5]) +-#define PT_REGS_PARM3(x) ((x)->regs[6]) +-#define PT_REGS_PARM4(x) ((x)->regs[7]) +-#define PT_REGS_PARM5(x) ((x)->regs[8]) +-#define PT_REGS_RET(x) ((x)->regs[31]) +-#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */ +-#define PT_REGS_RC(x) ((x)->regs[2]) +-#define PT_REGS_SP(x) ((x)->regs[29]) +-#define PT_REGS_IP(x) ((x)->cp0_epc) +- +-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), regs[4]) +-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), regs[5]) +-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), regs[6]) +-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), regs[7]) +-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8]) +-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31]) +-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30]) +-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[2]) +-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29]) +-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc) ++#define __PT_PARM1_REG regs[4] ++#define __PT_PARM2_REG regs[5] ++#define __PT_PARM3_REG regs[6] ++#define __PT_PARM4_REG regs[7] ++#define __PT_PARM5_REG regs[8] ++#define __PT_RET_REG regs[31] ++#define __PT_FP_REG regs[30] /* Works only with CONFIG_FRAME_POINTER */ ++#define __PT_RC_REG regs[2] ++#define __PT_SP_REG regs[29] ++#define __PT_IP_REG cp0_epc + + #elif defined(bpf_target_powerpc) + +-#define PT_REGS_PARM1(x) ((x)->gpr[3]) +-#define PT_REGS_PARM2(x) ((x)->gpr[4]) +-#define PT_REGS_PARM3(x) ((x)->gpr[5]) +-#define PT_REGS_PARM4(x) ((x)->gpr[6]) +-#define PT_REGS_PARM5(x) ((x)->gpr[7]) +-#define PT_REGS_RC(x) ((x)->gpr[3]) +-#define PT_REGS_SP(x) ((x)->sp) +-#define PT_REGS_IP(x) ((x)->nip) +- +-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), gpr[3]) +-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), gpr[4]) +-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), gpr[5]) +-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), gpr[6]) +-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), gpr[7]) +-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), gpr[3]) +-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp) +-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), nip) ++#define __PT_PARM1_REG gpr[3] ++#define __PT_PARM2_REG gpr[4] ++#define __PT_PARM3_REG gpr[5] ++#define __PT_PARM4_REG gpr[6] ++#define __PT_PARM5_REG gpr[7] ++#define __PT_RET_REG regs[31] ++#define __PT_FP_REG __unsupported__ ++#define __PT_RC_REG gpr[3] ++#define __PT_SP_REG sp ++#define __PT_IP_REG nip + + #elif defined(bpf_target_sparc) + +-#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) +-#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1]) +-#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2]) +-#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3]) +-#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4]) +-#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7]) +-#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) +-#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) +- +-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0]) +-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I1]) +-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I2]) +-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I3]) +-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I4]) +-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I7]) +-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0]) +-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), u_regs[UREG_FP]) +- ++#define __PT_PARM1_REG u_regs[UREG_I0] ++#define __PT_PARM2_REG u_regs[UREG_I1] ++#define __PT_PARM3_REG u_regs[UREG_I2] ++#define __PT_PARM4_REG u_regs[UREG_I3] ++#define __PT_PARM5_REG u_regs[UREG_I4] ++#define __PT_RET_REG u_regs[UREG_I7] ++#define __PT_FP_REG __unsupported__ ++#define __PT_RC_REG u_regs[UREG_I0] ++#define __PT_SP_REG u_regs[UREG_FP] + /* Should this also be a bpf_target check for the sparc case? */ + #if defined(__arch64__) +-#define PT_REGS_IP(x) ((x)->tpc) +-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), tpc) ++#define __PT_IP_REG tpc + #else +-#define PT_REGS_IP(x) ((x)->pc) +-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), pc) ++#define __PT_IP_REG pc + #endif + + #elif defined(bpf_target_riscv) + ++#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x)) ++#define __PT_PARM1_REG a0 ++#define __PT_PARM2_REG a1 ++#define __PT_PARM3_REG a2 ++#define __PT_PARM4_REG a3 ++#define __PT_PARM5_REG a4 ++#define __PT_RET_REG ra ++#define __PT_FP_REG fp ++#define __PT_RC_REG a5 ++#define __PT_SP_REG sp ++#define __PT_IP_REG epc ++ ++#endif ++ ++#if defined(bpf_target_defined) ++ + struct pt_regs; +-#define PT_REGS_RV const volatile struct user_regs_struct +-#define PT_REGS_PARM1(x) (((PT_REGS_RV *)(x))->a0) +-#define PT_REGS_PARM2(x) (((PT_REGS_RV *)(x))->a1) +-#define PT_REGS_PARM3(x) (((PT_REGS_RV *)(x))->a2) +-#define PT_REGS_PARM4(x) (((PT_REGS_RV *)(x))->a3) +-#define PT_REGS_PARM5(x) (((PT_REGS_RV *)(x))->a4) +-#define PT_REGS_RET(x) (((PT_REGS_RV *)(x))->ra) +-#define PT_REGS_FP(x) (((PT_REGS_RV *)(x))->s5) +-#define PT_REGS_RC(x) (((PT_REGS_RV *)(x))->a5) +-#define PT_REGS_SP(x) (((PT_REGS_RV *)(x))->sp) +-#define PT_REGS_IP(x) (((PT_REGS_RV *)(x))->epc) +- +-#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a0) +-#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a1) +-#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a2) +-#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a3) +-#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a4) +-#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), ra) +-#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), fp) +-#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a5) +-#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), sp) +-#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), epc) + ++/* allow some architecutres to override `struct pt_regs` */ ++#ifndef __PT_REGS_CAST ++#define __PT_REGS_CAST(x) (x) + #endif + ++#define PT_REGS_PARM1(x) (__PT_REGS_CAST(x)->__PT_PARM1_REG) ++#define PT_REGS_PARM2(x) (__PT_REGS_CAST(x)->__PT_PARM2_REG) ++#define PT_REGS_PARM3(x) (__PT_REGS_CAST(x)->__PT_PARM3_REG) ++#define PT_REGS_PARM4(x) (__PT_REGS_CAST(x)->__PT_PARM4_REG) ++#define PT_REGS_PARM5(x) (__PT_REGS_CAST(x)->__PT_PARM5_REG) ++#define PT_REGS_RET(x) (__PT_REGS_CAST(x)->__PT_RET_REG) ++#define PT_REGS_FP(x) (__PT_REGS_CAST(x)->__PT_FP_REG) ++#define PT_REGS_RC(x) (__PT_REGS_CAST(x)->__PT_RC_REG) ++#define PT_REGS_SP(x) (__PT_REGS_CAST(x)->__PT_SP_REG) ++#define PT_REGS_IP(x) (__PT_REGS_CAST(x)->__PT_IP_REG) ++ ++#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM1_REG) ++#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM2_REG) ++#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM3_REG) ++#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM4_REG) ++#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM5_REG) ++#define PT_REGS_RET_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RET_REG) ++#define PT_REGS_FP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_FP_REG) ++#define PT_REGS_RC_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RC_REG) ++#define PT_REGS_SP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_SP_REG) ++#define PT_REGS_IP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_IP_REG) ++ + #if defined(bpf_target_powerpc) ++ + #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) + #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP ++ + #elif defined(bpf_target_sparc) ++ + #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) + #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP +-#elif defined(bpf_target_defined) ++ ++#else ++ + #define BPF_KPROBE_READ_RET_IP(ip, ctx) \ + ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) + #define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \ +- ({ bpf_probe_read_kernel(&(ip), sizeof(ip), \ +- (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) ++ ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) ++ + #endif + +-#if !defined(bpf_target_defined) ++#else /* defined(bpf_target_defined) */ + + #define PT_REGS_PARM1(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) + #define PT_REGS_PARM2(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +@@ -363,7 +290,7 @@ struct pt_regs; + #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) + #define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) + +-#endif /* !defined(bpf_target_defined) */ ++#endif /* defined(bpf_target_defined) */ + + #ifndef ___bpf_concat + #define ___bpf_concat(a, b) a ## b diff --git a/patches.suse/libbpf-Pass-number-of-prog-load-attempts-explicitly.patch b/patches.suse/libbpf-Pass-number-of-prog-load-attempts-explicitly.patch new file mode 100644 index 0000000..f4d55d4 --- /dev/null +++ b/patches.suse/libbpf-Pass-number-of-prog-load-attempts-explicitly.patch @@ -0,0 +1,76 @@ +From: Andrii Nakryiko +Date: Wed, 3 Nov 2021 15:08:35 -0700 +Subject: libbpf: Pass number of prog load attempts explicitly +Patch-mainline: v5.17-rc1 +Git-commit: 45493cbaf59e3c9482e0e6a2646b362fff45db8b +References: jsc#PED-1368 + +Allow to control number of BPF_PROG_LOAD attempts from outside the +sys_bpf_prog_load() helper. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Acked-by: Dave Marchevsky +Link: https://lore.kernel.org/bpf/20211103220845.2676888-3-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/bpf.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +--- a/tools/lib/bpf/bpf.c ++++ b/tools/lib/bpf/bpf.c +@@ -74,14 +74,15 @@ static inline int sys_bpf_fd(enum bpf_cm + return ensure_good_fd(fd); + } + +-static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) ++#define PROG_LOAD_ATTEMPTS 5 ++ ++static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) + { +- int retries = 5; + int fd; + + do { + fd = sys_bpf_fd(BPF_PROG_LOAD, attr, size); +- } while (fd < 0 && errno == EAGAIN && retries-- > 0); ++ } while (fd < 0 && errno == EAGAIN && --attempts > 0); + + return fd; + } +@@ -304,7 +305,7 @@ int libbpf__bpf_prog_load(const struct b + memcpy(attr.prog_name, load_attr->name, + min(strlen(load_attr->name), (size_t)BPF_OBJ_NAME_LEN - 1)); + +- fd = sys_bpf_prog_load(&attr, sizeof(attr)); ++ fd = sys_bpf_prog_load(&attr, sizeof(attr), PROG_LOAD_ATTEMPTS); + if (fd >= 0) + return fd; + +@@ -345,7 +346,7 @@ int libbpf__bpf_prog_load(const struct b + break; + } + +- fd = sys_bpf_prog_load(&attr, sizeof(attr)); ++ fd = sys_bpf_prog_load(&attr, sizeof(attr), PROG_LOAD_ATTEMPTS); + if (fd >= 0) + goto done; + } +@@ -359,7 +360,7 @@ int libbpf__bpf_prog_load(const struct b + attr.log_level = 1; + load_attr->log_buf[0] = 0; + +- fd = sys_bpf_prog_load(&attr, sizeof(attr)); ++ fd = sys_bpf_prog_load(&attr, sizeof(attr), PROG_LOAD_ATTEMPTS); + done: + /* free() doesn't affect errno, so we don't need to restore it */ + free(finfo); +@@ -449,7 +450,7 @@ int bpf_verify_program(enum bpf_prog_typ + attr.kern_version = kern_version; + attr.prog_flags = prog_flags; + +- fd = sys_bpf_prog_load(&attr, sizeof(attr)); ++ fd = sys_bpf_prog_load(&attr, sizeof(attr), PROG_LOAD_ATTEMPTS); + return libbpf_err_errno(fd); + } + diff --git a/patches.suse/libbpf-Preserve-kernel-error-code-and-remove-kprobe-.patch b/patches.suse/libbpf-Preserve-kernel-error-code-and-remove-kprobe-.patch new file mode 100644 index 0000000..c875ed1 --- /dev/null +++ b/patches.suse/libbpf-Preserve-kernel-error-code-and-remove-kprobe-.patch @@ -0,0 +1,66 @@ +From: Andrii Nakryiko +Date: Thu, 9 Dec 2021 11:38:34 -0800 +Subject: libbpf: Preserve kernel error code and remove kprobe prog type + guessing +Patch-mainline: v5.17-rc1 +Git-commit: 2eda2145ebfc76569fd088f46356203fc0c785a1 +References: jsc#PED-1368 + +Instead of rewriting error code returned by the kernel of prog load with +libbpf-sepcific variants pass through the original error. + +There is now also no need to have a backup generic -LIBBPF_ERRNO__LOAD +fallback error as bpf_prog_load() guarantees that errno will be properly +set no matter what. + +Also drop a completely outdated and pretty useless BPF_PROG_TYPE_KPROBE +guess logic. It's not necessary and neither it's helpful in modern BPF +applications. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211209193840.1248570-7-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 19 ++----------------- + 1 file changed, 2 insertions(+), 17 deletions(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -6696,34 +6696,19 @@ retry_load: + free(log_buf); + goto retry_load; + } +- ret = errno ? -errno : -LIBBPF_ERRNO__LOAD; ++ ++ ret = -errno; + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); + pr_perm_msg(ret); + + if (log_buf && log_buf[0] != '\0') { +- ret = -LIBBPF_ERRNO__VERIFY; + pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", + prog->name, log_buf); + } + if (insns_cnt >= BPF_MAXINSNS) { + pr_warn("prog '%s': program too large (%d insns), at most %d insns\n", + prog->name, insns_cnt, BPF_MAXINSNS); +- ret = -LIBBPF_ERRNO__PROG2BIG; +- } else if (prog->type != BPF_PROG_TYPE_KPROBE) { +- /* Wrong program type? */ +- int fd; +- +- load_attr.expected_attach_type = 0; +- load_attr.log_buf = NULL; +- load_attr.log_size = 0; +- fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, prog_name, license, +- insns, insns_cnt, &load_attr); +- if (fd >= 0) { +- close(fd); +- ret = -LIBBPF_ERRNO__PROGTYPE; +- goto out; +- } + } + + out: diff --git a/patches.suse/libbpf-Prevent-deprecation-warnings-in-xsk.c.patch b/patches.suse/libbpf-Prevent-deprecation-warnings-in-xsk.c.patch new file mode 100644 index 0000000..d4214a7 --- /dev/null +++ b/patches.suse/libbpf-Prevent-deprecation-warnings-in-xsk.c.patch @@ -0,0 +1,35 @@ +From: Andrii Nakryiko +Date: Wed, 24 Nov 2021 11:32:32 -0800 +Subject: libbpf: Prevent deprecation warnings in xsk.c +Patch-mainline: v5.17-rc1 +Git-commit: 99a12a32fee4f740af2f36bb8f64e11c026f3389 +References: jsc#PED-1368 + +xsk.c is using own APIs that are marked for deprecation internally. +Given xsk.c and xsk.h will be gone in libbpf 1.0, there is no reason to +do public vs internal function split just to avoid deprecation warnings. +So just add a pragma to silence deprecation warnings (until the code is +removed completely). + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211124193233.3115996-4-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/xsk.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/tools/lib/bpf/xsk.c ++++ b/tools/lib/bpf/xsk.c +@@ -35,6 +35,11 @@ + #include "libbpf_internal.h" + #include "xsk.h" + ++/* entire xsk.h and xsk.c is going away in libbpf 1.0, so ignore all internal ++ * uses of deprecated APIs ++ */ ++#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ++ + #ifndef SOL_XDP + #define SOL_XDP 283 + #endif diff --git a/patches.suse/libbpf-Reduce-bpf_core_apply_relo_insn-stack-usage.patch b/patches.suse/libbpf-Reduce-bpf_core_apply_relo_insn-stack-usage.patch new file mode 100644 index 0000000..d3d551d --- /dev/null +++ b/patches.suse/libbpf-Reduce-bpf_core_apply_relo_insn-stack-usage.patch @@ -0,0 +1,251 @@ +From: Alexei Starovoitov +Date: Fri, 3 Dec 2021 10:28:36 -0800 +Subject: libbpf: Reduce bpf_core_apply_relo_insn() stack usage. +Patch-mainline: v5.17-rc1 +Git-commit: 78c1f8d0634cc35da613d844eda7c849fc50f643 +References: jsc#PED-1368 + +Reduce bpf_core_apply_relo_insn() stack usage and bump +BPF_CORE_SPEC_MAX_LEN limit back to 64. + +Fixes: 29db4bea1d10 ("bpf: Prepare relo_core.c for kernel duty.") +Signed-off-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211203182836.16646-1-alexei.starovoitov@gmail.com +Acked-by: Shung-Hsi Yu +--- + kernel/bpf/btf.c | 11 +++++++- + tools/lib/bpf/libbpf.c | 4 ++ + tools/lib/bpf/relo_core.c | 62 ++++++++++++++-------------------------------- + tools/lib/bpf/relo_core.h | 30 +++++++++++++++++++++- + 4 files changed, 61 insertions(+), 46 deletions(-) + +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -6735,8 +6735,16 @@ int bpf_core_apply(struct bpf_core_ctx * + { + bool need_cands = relo->kind != BPF_CORE_TYPE_ID_LOCAL; + struct bpf_core_cand_list cands = {}; ++ struct bpf_core_spec *specs; + int err; + ++ /* ~4k of temp memory necessary to convert LLVM spec like "0:1:0:5" ++ * into arrays of btf_ids of struct fields and array indices. ++ */ ++ specs = kcalloc(3, sizeof(*specs), GFP_KERNEL); ++ if (!specs) ++ return -ENOMEM; ++ + if (need_cands) { + struct bpf_cand_cache *cc; + int i; +@@ -6772,8 +6780,9 @@ int bpf_core_apply(struct bpf_core_ctx * + } + + err = bpf_core_apply_relo_insn((void *)ctx->log, insn, relo->insn_off / 8, +- relo, relo_idx, ctx->btf, &cands); ++ relo, relo_idx, ctx->btf, &cands, specs); + out: ++ kfree(specs); + if (need_cands) { + kfree(cands.cands); + mutex_unlock(&cand_cache_mutex); +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -5515,6 +5515,7 @@ static int bpf_core_apply_relo(struct bp + const struct btf *local_btf, + struct hashmap *cand_cache) + { ++ struct bpf_core_spec specs_scratch[3] = {}; + const void *type_key = u32_as_hash_key(relo->type_id); + struct bpf_core_cand_list *cands = NULL; + const char *prog_name = prog->name; +@@ -5569,7 +5570,8 @@ static int bpf_core_apply_relo(struct bp + } + } + +- return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, relo_idx, local_btf, cands); ++ return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, ++ relo_idx, local_btf, cands, specs_scratch); + } + + static int +--- a/tools/lib/bpf/relo_core.c ++++ b/tools/lib/bpf/relo_core.c +@@ -68,33 +68,6 @@ enum libbpf_print_level { + #include "libbpf_internal.h" + #endif + +-#define BPF_CORE_SPEC_MAX_LEN 32 +- +-/* represents BPF CO-RE field or array element accessor */ +-struct bpf_core_accessor { +- __u32 type_id; /* struct/union type or array element type */ +- __u32 idx; /* field index or array index */ +- const char *name; /* field name or NULL for array accessor */ +-}; +- +-struct bpf_core_spec { +- const struct btf *btf; +- /* high-level spec: named fields and array indices only */ +- struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; +- /* original unresolved (no skip_mods_or_typedefs) root type ID */ +- __u32 root_type_id; +- /* CO-RE relocation kind */ +- enum bpf_core_relo_kind relo_kind; +- /* high-level spec length */ +- int len; +- /* raw, low-level spec: 1-to-1 with accessor spec string */ +- int raw_spec[BPF_CORE_SPEC_MAX_LEN]; +- /* raw spec length */ +- int raw_len; +- /* field bit offset represented by spec */ +- __u32 bit_offset; +-}; +- + static bool is_flex_arr(const struct btf *btf, + const struct bpf_core_accessor *acc, + const struct btf_array *arr) +@@ -1200,9 +1173,12 @@ int bpf_core_apply_relo_insn(const char + const struct bpf_core_relo *relo, + int relo_idx, + const struct btf *local_btf, +- struct bpf_core_cand_list *cands) ++ struct bpf_core_cand_list *cands, ++ struct bpf_core_spec *specs_scratch) + { +- struct bpf_core_spec local_spec, cand_spec, targ_spec = {}; ++ struct bpf_core_spec *local_spec = &specs_scratch[0]; ++ struct bpf_core_spec *cand_spec = &specs_scratch[1]; ++ struct bpf_core_spec *targ_spec = &specs_scratch[2]; + struct bpf_core_relo_res cand_res, targ_res; + const struct btf_type *local_type; + const char *local_name; +@@ -1221,7 +1197,7 @@ int bpf_core_apply_relo_insn(const char + return -EINVAL; + + err = bpf_core_parse_spec(prog_name, local_btf, local_id, spec_str, +- relo->kind, &local_spec); ++ relo->kind, local_spec); + if (err) { + pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", + prog_name, relo_idx, local_id, btf_kind_str(local_type), +@@ -1232,15 +1208,15 @@ int bpf_core_apply_relo_insn(const char + + pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name, + relo_idx, core_relo_kind_str(relo->kind), relo->kind); +- bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, &local_spec); ++ bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, local_spec); + libbpf_print(LIBBPF_DEBUG, "\n"); + + /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ + if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) { + targ_res.validate = true; + targ_res.poison = false; +- targ_res.orig_val = local_spec.root_type_id; +- targ_res.new_val = local_spec.root_type_id; ++ targ_res.orig_val = local_spec->root_type_id; ++ targ_res.new_val = local_spec->root_type_id; + goto patch_insn; + } + +@@ -1253,38 +1229,38 @@ int bpf_core_apply_relo_insn(const char + + + for (i = 0, j = 0; i < cands->len; i++) { +- err = bpf_core_spec_match(&local_spec, cands->cands[i].btf, +- cands->cands[i].id, &cand_spec); ++ err = bpf_core_spec_match(local_spec, cands->cands[i].btf, ++ cands->cands[i].id, cand_spec); + if (err < 0) { + pr_warn("prog '%s': relo #%d: error matching candidate #%d ", + prog_name, relo_idx, i); +- bpf_core_dump_spec(prog_name, LIBBPF_WARN, &cand_spec); ++ bpf_core_dump_spec(prog_name, LIBBPF_WARN, cand_spec); + libbpf_print(LIBBPF_WARN, ": %d\n", err); + return err; + } + + pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name, + relo_idx, err == 0 ? "non-matching" : "matching", i); +- bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, &cand_spec); ++ bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, cand_spec); + libbpf_print(LIBBPF_DEBUG, "\n"); + + if (err == 0) + continue; + +- err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, &cand_spec, &cand_res); ++ err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, cand_spec, &cand_res); + if (err) + return err; + + if (j == 0) { + targ_res = cand_res; +- targ_spec = cand_spec; +- } else if (cand_spec.bit_offset != targ_spec.bit_offset) { ++ *targ_spec = *cand_spec; ++ } else if (cand_spec->bit_offset != targ_spec->bit_offset) { + /* if there are many field relo candidates, they + * should all resolve to the same bit offset + */ + pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n", +- prog_name, relo_idx, cand_spec.bit_offset, +- targ_spec.bit_offset); ++ prog_name, relo_idx, cand_spec->bit_offset, ++ targ_spec->bit_offset); + return -EINVAL; + } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) { + /* all candidates should result in the same relocation +@@ -1328,7 +1304,7 @@ int bpf_core_apply_relo_insn(const char + prog_name, relo_idx); + + /* calculate single target relo result explicitly */ +- err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, NULL, &targ_res); ++ err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, NULL, &targ_res); + if (err) + return err; + } +--- a/tools/lib/bpf/relo_core.h ++++ b/tools/lib/bpf/relo_core.h +@@ -17,11 +17,39 @@ struct bpf_core_cand_list { + int len; + }; + ++#define BPF_CORE_SPEC_MAX_LEN 64 ++ ++/* represents BPF CO-RE field or array element accessor */ ++struct bpf_core_accessor { ++ __u32 type_id; /* struct/union type or array element type */ ++ __u32 idx; /* field index or array index */ ++ const char *name; /* field name or NULL for array accessor */ ++}; ++ ++struct bpf_core_spec { ++ const struct btf *btf; ++ /* high-level spec: named fields and array indices only */ ++ struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; ++ /* original unresolved (no skip_mods_or_typedefs) root type ID */ ++ __u32 root_type_id; ++ /* CO-RE relocation kind */ ++ enum bpf_core_relo_kind relo_kind; ++ /* high-level spec length */ ++ int len; ++ /* raw, low-level spec: 1-to-1 with accessor spec string */ ++ int raw_spec[BPF_CORE_SPEC_MAX_LEN]; ++ /* raw spec length */ ++ int raw_len; ++ /* field bit offset represented by spec */ ++ __u32 bit_offset; ++}; ++ + int bpf_core_apply_relo_insn(const char *prog_name, + struct bpf_insn *insn, int insn_idx, + const struct bpf_core_relo *relo, int relo_idx, + const struct btf *local_btf, +- struct bpf_core_cand_list *cands); ++ struct bpf_core_cand_list *cands, ++ struct bpf_core_spec *specs_scratch); + int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, + const struct btf *targ_btf, __u32 targ_id); + diff --git a/patches.suse/libbpf-Remove-deprecation-attribute-from-struct-bpf_.patch b/patches.suse/libbpf-Remove-deprecation-attribute-from-struct-bpf_.patch new file mode 100644 index 0000000..1c3aab2 --- /dev/null +++ b/patches.suse/libbpf-Remove-deprecation-attribute-from-struct-bpf_.patch @@ -0,0 +1,37 @@ +From: Andrii Nakryiko +Date: Wed, 3 Nov 2021 15:08:40 -0700 +Subject: libbpf: Remove deprecation attribute from struct bpf_prog_prep_result +Patch-mainline: v5.17-rc1 +Git-commit: 5c5edcdebfcf3a95257b0d8ef27a60af0e0ea03a +References: jsc#PED-1368 + +This deprecation annotation has no effect because for struct deprecation +attribute has to be declared after struct definition. But instead of +moving it to the end of struct definition, remove it. When deprecation +will go in effect at libbpf v0.7, this deprecation attribute will cause +libbpf's own source code compilation to trigger deprecation warnings, +which is unavoidable because libbpf still has to support that API. + +So keep deprecation of APIs, but don't mark structs used in API as +deprecated. + +Fixes: e21d585cb3db ("libbpf: Deprecate multi-instance bpf_program APIs") +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Acked-by: Dave Marchevsky +Link: https://lore.kernel.org/bpf/20211103220845.2676888-8-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.h | 1 - + 1 file changed, 1 deletion(-) + +--- a/tools/lib/bpf/libbpf.h ++++ b/tools/lib/bpf/libbpf.h +@@ -430,7 +430,6 @@ bpf_program__attach_iter(const struct bp + * one instance. In this case bpf_program__fd(prog) is equal to + * bpf_program__nth_fd(prog, 0). + */ +-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insns() for getting bpf_program instructions") + struct bpf_prog_prep_result { + /* + * If not NULL, load new instruction array. diff --git a/patches.suse/libbpf-Remove-duplicate-assignments.patch b/patches.suse/libbpf-Remove-duplicate-assignments.patch new file mode 100644 index 0000000..3072006 --- /dev/null +++ b/patches.suse/libbpf-Remove-duplicate-assignments.patch @@ -0,0 +1,27 @@ +From: Mehrdad Arshad Rad +Date: Sun, 28 Nov 2021 11:33:37 -0800 +Subject: libbpf: Remove duplicate assignments +Patch-mainline: v5.17-rc1 +Git-commit: c291d0a4d169811898d723cfa5f1aa1fc60e607c +References: jsc#PED-1368 + +There is a same action when load_attr.attach_btf_id is initialized. + +Signed-off-by: Mehrdad Arshad Rad +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211128193337.10628-1-arshad.rad@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -6559,7 +6559,6 @@ static int bpf_object_load_prog_instance + load_attr.expected_attach_type = prog->expected_attach_type; + if (kernel_supports(obj, FEAT_PROG_NAME)) + prog_name = prog->name; +- load_attr.attach_btf_id = prog->attach_btf_id; + load_attr.attach_prog_fd = prog->attach_prog_fd; + load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd; + load_attr.attach_btf_id = prog->attach_btf_id; diff --git a/patches.suse/libbpf-Remove-internal-use-of-deprecated-bpf_prog_lo.patch b/patches.suse/libbpf-Remove-internal-use-of-deprecated-bpf_prog_lo.patch new file mode 100644 index 0000000..e94b1ea --- /dev/null +++ b/patches.suse/libbpf-Remove-internal-use-of-deprecated-bpf_prog_lo.patch @@ -0,0 +1,420 @@ +From: Andrii Nakryiko +Date: Wed, 3 Nov 2021 15:08:37 -0700 +Subject: libbpf: Remove internal use of deprecated bpf_prog_load() variants +Patch-mainline: v5.17-rc1 +Git-commit: e32660ac6fd6bd3c9d249644330d968c6ef61b07 +References: jsc#PED-1368 + +Remove all the internal uses of bpf_load_program_xattr(), which is +slated for deprecation in v0.7. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211103220845.2676888-5-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/bpf.c | 8 ++ + tools/lib/bpf/libbpf.c | 119 +++++++++++++----------------------------- + tools/lib/bpf/libbpf_probes.c | 20 +++---- + tools/lib/bpf/xsk.c | 34 +++--------- + 4 files changed, 64 insertions(+), 117 deletions(-) + +--- a/tools/lib/bpf/bpf.c ++++ b/tools/lib/bpf/bpf.c +@@ -401,8 +401,12 @@ done: + return libbpf_err_errno(fd); + } + ++__attribute__((alias("bpf_load_program_xattr2"))) + int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, +- char *log_buf, size_t log_buf_sz) ++ char *log_buf, size_t log_buf_sz); ++ ++static int bpf_load_program_xattr2(const struct bpf_load_program_attr *load_attr, ++ char *log_buf, size_t log_buf_sz) + { + LIBBPF_OPTS(bpf_prog_load_opts, p); + +@@ -456,7 +460,7 @@ int bpf_load_program(enum bpf_prog_type + load_attr.license = license; + load_attr.kern_version = kern_version; + +- return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz); ++ return bpf_load_program_xattr2(&load_attr, log_buf, log_buf_sz); + } + + int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -4282,30 +4282,20 @@ int bpf_map__resize(struct bpf_map *map, + static int + bpf_object__probe_loading(struct bpf_object *obj) + { +- struct bpf_load_program_attr attr; + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; +- int ret; ++ int ret, insn_cnt = ARRAY_SIZE(insns); + + if (obj->gen_loader) + return 0; + + /* make sure basic loading works */ +- +- memset(&attr, 0, sizeof(attr)); +- attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; +- attr.insns = insns; +- attr.insns_cnt = ARRAY_SIZE(insns); +- attr.license = "GPL"; +- +- ret = bpf_load_program_xattr(&attr, NULL, 0); +- if (ret < 0) { +- attr.prog_type = BPF_PROG_TYPE_TRACEPOINT; +- ret = bpf_load_program_xattr(&attr, NULL, 0); +- } ++ ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); ++ if (ret < 0) ++ ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); + if (ret < 0) { + ret = errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); +@@ -4329,28 +4319,19 @@ static int probe_fd(int fd) + + static int probe_kern_prog_name(void) + { +- struct bpf_load_program_attr attr; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; +- int ret; ++ int ret, insn_cnt = ARRAY_SIZE(insns); + + /* make sure loading with name works */ +- +- memset(&attr, 0, sizeof(attr)); +- attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; +- attr.insns = insns; +- attr.insns_cnt = ARRAY_SIZE(insns); +- attr.license = "GPL"; +- attr.name = "test"; +- ret = bpf_load_program_xattr(&attr, NULL, 0); ++ ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "test", "GPL", insns, insn_cnt, NULL); + return probe_fd(ret); + } + + static int probe_kern_global_data(void) + { +- struct bpf_load_program_attr prg_attr; + struct bpf_create_map_attr map_attr; + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { +@@ -4359,7 +4340,7 @@ static int probe_kern_global_data(void) + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; +- int ret, map; ++ int ret, map, insn_cnt = ARRAY_SIZE(insns); + + memset(&map_attr, 0, sizeof(map_attr)); + map_attr.map_type = BPF_MAP_TYPE_ARRAY; +@@ -4378,13 +4359,7 @@ static int probe_kern_global_data(void) + + insns[0].imm = map; + +- memset(&prg_attr, 0, sizeof(prg_attr)); +- prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; +- prg_attr.insns = insns; +- prg_attr.insns_cnt = ARRAY_SIZE(insns); +- prg_attr.license = "GPL"; +- +- ret = bpf_load_program_xattr(&prg_attr, NULL, 0); ++ ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + close(map); + return probe_fd(ret); + } +@@ -4500,30 +4475,24 @@ static int probe_kern_array_mmap(void) + + static int probe_kern_exp_attach_type(void) + { +- struct bpf_load_program_attr attr; ++ LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; ++ int fd, insn_cnt = ARRAY_SIZE(insns); + +- memset(&attr, 0, sizeof(attr)); + /* use any valid combination of program type and (optional) + * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) + * to see if kernel supports expected_attach_type field for + * BPF_PROG_LOAD command + */ +- attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK; +- attr.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE; +- attr.insns = insns; +- attr.insns_cnt = ARRAY_SIZE(insns); +- attr.license = "GPL"; +- +- return probe_fd(bpf_load_program_xattr(&attr, NULL, 0)); ++ fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); ++ return probe_fd(fd); + } + + static int probe_kern_probe_read_kernel(void) + { +- struct bpf_load_program_attr attr; + struct bpf_insn insns[] = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ +@@ -4532,26 +4501,21 @@ static int probe_kern_probe_read_kernel( + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), + BPF_EXIT_INSN(), + }; ++ int fd, insn_cnt = ARRAY_SIZE(insns); + +- memset(&attr, 0, sizeof(attr)); +- attr.prog_type = BPF_PROG_TYPE_KPROBE; +- attr.insns = insns; +- attr.insns_cnt = ARRAY_SIZE(insns); +- attr.license = "GPL"; +- +- return probe_fd(bpf_load_program_xattr(&attr, NULL, 0)); ++ fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); ++ return probe_fd(fd); + } + + static int probe_prog_bind_map(void) + { +- struct bpf_load_program_attr prg_attr; + struct bpf_create_map_attr map_attr; + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; +- int ret, map, prog; ++ int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); + + memset(&map_attr, 0, sizeof(map_attr)); + map_attr.map_type = BPF_MAP_TYPE_ARRAY; +@@ -4568,13 +4532,7 @@ static int probe_prog_bind_map(void) + return ret; + } + +- memset(&prg_attr, 0, sizeof(prg_attr)); +- prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; +- prg_attr.insns = insns; +- prg_attr.insns_cnt = ARRAY_SIZE(insns); +- prg_attr.license = "GPL"; +- +- prog = bpf_load_program_xattr(&prg_attr, NULL, 0); ++ prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + if (prog < 0) { + close(map); + return 0; +@@ -4619,19 +4577,14 @@ static int probe_module_btf(void) + + static int probe_perf_link(void) + { +- struct bpf_load_program_attr attr; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd, link_fd, err; + +- memset(&attr, 0, sizeof(attr)); +- attr.prog_type = BPF_PROG_TYPE_TRACEPOINT; +- attr.insns = insns; +- attr.insns_cnt = ARRAY_SIZE(insns); +- attr.license = "GPL"; +- prog_fd = bpf_load_program_xattr(&attr, NULL, 0); ++ prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", ++ insns, ARRAY_SIZE(insns), NULL); + if (prog_fd < 0) + return -errno; + +@@ -9166,22 +9119,12 @@ long libbpf_get_error(const void *ptr) + return -errno; + } + +-COMPAT_VERSION(bpf_prog_load_deprecated, bpf_prog_load, LIBBPF_0.0.1) +-int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, +- struct bpf_object **pobj, int *prog_fd) +-{ +- struct bpf_prog_load_attr attr; +- +- memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); +- attr.file = file; +- attr.prog_type = type; +- attr.expected_attach_type = 0; +- +- return bpf_prog_load_xattr(&attr, pobj, prog_fd); +-} +- ++__attribute__((alias("bpf_prog_load_xattr2"))) + int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, +- struct bpf_object **pobj, int *prog_fd) ++ struct bpf_object **pobj, int *prog_fd); ++ ++static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr, ++ struct bpf_object **pobj, int *prog_fd) + { + struct bpf_object_open_attr open_attr = {}; + struct bpf_program *prog, *first_prog = NULL; +@@ -9252,6 +9195,20 @@ int bpf_prog_load_xattr(const struct bpf + return 0; + } + ++COMPAT_VERSION(bpf_prog_load_deprecated, bpf_prog_load, LIBBPF_0.0.1) ++int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, ++ struct bpf_object **pobj, int *prog_fd) ++{ ++ struct bpf_prog_load_attr attr; ++ ++ memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); ++ attr.file = file; ++ attr.prog_type = type; ++ attr.expected_attach_type = 0; ++ ++ return bpf_prog_load_xattr2(&attr, pobj, prog_fd); ++} ++ + struct bpf_link { + int (*detach)(struct bpf_link *link); + void (*dealloc)(struct bpf_link *link); +--- a/tools/lib/bpf/libbpf_probes.c ++++ b/tools/lib/bpf/libbpf_probes.c +@@ -68,21 +68,21 @@ static void + probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, + size_t insns_cnt, char *buf, size_t buf_len, __u32 ifindex) + { +- struct bpf_load_program_attr xattr = {}; ++ LIBBPF_OPTS(bpf_prog_load_opts, opts); + int fd; + + switch (prog_type) { + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: +- xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT; ++ opts.expected_attach_type = BPF_CGROUP_INET4_CONNECT; + break; + case BPF_PROG_TYPE_CGROUP_SOCKOPT: +- xattr.expected_attach_type = BPF_CGROUP_GETSOCKOPT; ++ opts.expected_attach_type = BPF_CGROUP_GETSOCKOPT; + break; + case BPF_PROG_TYPE_SK_LOOKUP: +- xattr.expected_attach_type = BPF_SK_LOOKUP; ++ opts.expected_attach_type = BPF_SK_LOOKUP; + break; + case BPF_PROG_TYPE_KPROBE: +- xattr.kern_version = get_kernel_version(); ++ opts.kern_version = get_kernel_version(); + break; + case BPF_PROG_TYPE_UNSPEC: + case BPF_PROG_TYPE_SOCKET_FILTER: +@@ -115,13 +115,11 @@ probe_load(enum bpf_prog_type prog_type, + break; + } + +- xattr.prog_type = prog_type; +- xattr.insns = insns; +- xattr.insns_cnt = insns_cnt; +- xattr.license = "GPL"; +- xattr.prog_ifindex = ifindex; ++ opts.prog_ifindex = ifindex; ++ opts.log_buf = buf; ++ opts.log_size = buf_len; + +- fd = bpf_load_program_xattr(&xattr, buf, buf_len); ++ fd = bpf_prog_load(prog_type, NULL, "GPL", insns, insns_cnt, NULL); + if (fd >= 0) + close(fd); + } +--- a/tools/lib/bpf/xsk.c ++++ b/tools/lib/bpf/xsk.c +@@ -364,7 +364,6 @@ int xsk_umem__create_v0_0_2(struct xsk_u + static enum xsk_prog get_xsk_prog(void) + { + enum xsk_prog detected = XSK_PROG_FALLBACK; +- struct bpf_load_program_attr prog_attr; + struct bpf_create_map_attr map_attr; + __u32 size_out, retval, duration; + char data_in = 0, data_out; +@@ -375,7 +374,7 @@ static enum xsk_prog get_xsk_prog(void) + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + BPF_EXIT_INSN(), + }; +- int prog_fd, map_fd, ret; ++ int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns); + + memset(&map_attr, 0, sizeof(map_attr)); + map_attr.map_type = BPF_MAP_TYPE_XSKMAP; +@@ -389,13 +388,7 @@ static enum xsk_prog get_xsk_prog(void) + + insns[0].imm = map_fd; + +- memset(&prog_attr, 0, sizeof(prog_attr)); +- prog_attr.prog_type = BPF_PROG_TYPE_XDP; +- prog_attr.insns = insns; +- prog_attr.insns_cnt = ARRAY_SIZE(insns); +- prog_attr.license = "GPL"; +- +- prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0); ++ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL); + if (prog_fd < 0) { + close(map_fd); + return detected; +@@ -495,10 +488,13 @@ static int xsk_load_xdp_prog(struct xsk_ + }; + struct bpf_insn *progs[] = {prog, prog_redirect_flags}; + enum xsk_prog option = get_xsk_prog(); ++ LIBBPF_OPTS(bpf_prog_load_opts, opts, ++ .log_buf = log_buf, ++ .log_size = log_buf_size, ++ ); + +- prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, progs[option], insns_cnt[option], +- "LGPL-2.1 or BSD-2-Clause", 0, log_buf, +- log_buf_size); ++ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "LGPL-2.1 or BSD-2-Clause", ++ progs[option], insns_cnt[option], &opts); + if (prog_fd < 0) { + pr_warn("BPF log buffer:\n%s", log_buf); + return prog_fd; +@@ -725,14 +721,12 @@ static int xsk_link_lookup(int ifindex, + + static bool xsk_probe_bpf_link(void) + { +- DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts, +- .flags = XDP_FLAGS_SKB_MODE); +- struct bpf_load_program_attr prog_attr; ++ LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = XDP_FLAGS_SKB_MODE); + struct bpf_insn insns[2] = { + BPF_MOV64_IMM(BPF_REG_0, XDP_PASS), + BPF_EXIT_INSN() + }; +- int prog_fd, link_fd = -1; ++ int prog_fd, link_fd = -1, insn_cnt = ARRAY_SIZE(insns); + int ifindex_lo = 1; + bool ret = false; + int err; +@@ -744,13 +738,7 @@ static bool xsk_probe_bpf_link(void) + if (link_fd >= 0) + return true; + +- memset(&prog_attr, 0, sizeof(prog_attr)); +- prog_attr.prog_type = BPF_PROG_TYPE_XDP; +- prog_attr.insns = insns; +- prog_attr.insns_cnt = ARRAY_SIZE(insns); +- prog_attr.license = "GPL"; +- +- prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0); ++ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL); + if (prog_fd < 0) + return ret; + diff --git a/patches.suse/libbpf-Rename-DECLARE_LIBBPF_OPTS-into-LIBBPF_OPTS.patch b/patches.suse/libbpf-Rename-DECLARE_LIBBPF_OPTS-into-LIBBPF_OPTS.patch new file mode 100644 index 0000000..127dbc7 --- /dev/null +++ b/patches.suse/libbpf-Rename-DECLARE_LIBBPF_OPTS-into-LIBBPF_OPTS.patch @@ -0,0 +1,61 @@ +From: Andrii Nakryiko +Date: Wed, 3 Nov 2021 15:08:34 -0700 +Subject: libbpf: Rename DECLARE_LIBBPF_OPTS into LIBBPF_OPTS +Patch-mainline: v5.17-rc1 +Git-commit: be80e9cdbca8ac66d09e0e24e0bd41d992362a0b +References: jsc#PED-1368 + +It's confusing that libbpf-provided helper macro doesn't start with +LIBBPF. Also "declare" vs "define" is confusing terminology, I can never +remember and always have to look up previous examples. + +Bypass both issues by renaming DECLARE_LIBBPF_OPTS into a short and +clean LIBBPF_OPTS. To avoid breaking existing code, provide: + + #define DECLARE_LIBBPF_OPTS LIBBPF_OPTS + +in libbpf_legacy.h. We can decide later if we ever want to remove it or +we'll keep it forever because it doesn't add any maintainability burden. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Acked-by: Dave Marchevsky +Link: https://lore.kernel.org/bpf/20211103220845.2676888-2-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/bpf.h | 1 + + tools/lib/bpf/libbpf_common.h | 2 +- + tools/lib/bpf/libbpf_legacy.h | 1 + + 3 files changed, 3 insertions(+), 1 deletion(-) + +--- a/tools/lib/bpf/bpf.h ++++ b/tools/lib/bpf/bpf.h +@@ -29,6 +29,7 @@ + #include + + #include "libbpf_common.h" ++#include "libbpf_legacy.h" + + #ifdef __cplusplus + extern "C" { +--- a/tools/lib/bpf/libbpf_common.h ++++ b/tools/lib/bpf/libbpf_common.h +@@ -54,7 +54,7 @@ + * including any extra padding, it with memset() and then assigns initial + * values provided by users in struct initializer-syntax as varargs. + */ +-#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \ ++#define LIBBPF_OPTS(TYPE, NAME, ...) \ + struct TYPE NAME = ({ \ + memset(&NAME, 0, sizeof(struct TYPE)); \ + (struct TYPE) { \ +--- a/tools/lib/bpf/libbpf_legacy.h ++++ b/tools/lib/bpf/libbpf_legacy.h +@@ -69,6 +69,7 @@ enum libbpf_strict_mode { + + LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode); + ++#define DECLARE_LIBBPF_OPTS LIBBPF_OPTS + + #ifdef __cplusplus + } /* extern "C" */ diff --git a/patches.suse/libbpf-Rename-libbpf-documentation-index-file.patch b/patches.suse/libbpf-Rename-libbpf-documentation-index-file.patch new file mode 100644 index 0000000..7de86ee --- /dev/null +++ b/patches.suse/libbpf-Rename-libbpf-documentation-index-file.patch @@ -0,0 +1,79 @@ +From: Grant Seltzer +Date: Wed, 18 Aug 2021 11:13:13 -0400 +Subject: libbpf: Rename libbpf documentation index file +Patch-mainline: v5.15-rc1 +Git-commit: d20b41115ad53293201cc07ee429a38740cb056b +References: jsc#PED-1368 + +This patch renames a documentation libbpf.rst to index.rst. In order +for readthedocs.org to pick this file up and properly build the +documentation site. + +It also changes the title type of the ABI subsection in the +naming convention doc. This is so that readthedocs.org doesn't treat this +section as a separate document. + +Signed-off-by: Grant Seltzer +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20210818151313.49992-1-grantseltzer@gmail.com +Acked-by: Shung-Hsi Yu +--- + Documentation/bpf/libbpf/index.rst | 22 ++++++++++++++++++ + Documentation/bpf/libbpf/libbpf.rst | 14 ----------- + Documentation/bpf/libbpf/libbpf_naming_convention.rst | 2 - + 3 files changed, 23 insertions(+), 15 deletions(-) + rename Documentation/bpf/libbpf/{libbpf.rst => index.rst} (75%) + +--- /dev/null ++++ b/Documentation/bpf/libbpf/index.rst +@@ -0,0 +1,22 @@ ++.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++ ++libbpf ++====== ++ ++For API documentation see the `versioned API documentation site `_. ++ ++.. toctree:: ++ :maxdepth: 1 ++ ++ libbpf_naming_convention ++ libbpf_build ++ ++This is documentation for libbpf, a userspace library for loading and ++interacting with bpf programs. ++ ++All general BPF questions, including kernel functionality, libbpf APIs and ++their application, should be sent to bpf@vger.kernel.org mailing list. ++You can `subscribe `_ to the ++mailing list search its `archive `_. ++Please search the archive before asking new questions. It very well might ++be that this was already addressed or answered before. +--- a/Documentation/bpf/libbpf/libbpf.rst ++++ /dev/null +@@ -1,14 +0,0 @@ +-.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +- +-libbpf +-====== +- +-This is documentation for libbpf, a userspace library for loading and +-interacting with bpf programs. +- +-All general BPF questions, including kernel functionality, libbpf APIs and +-their application, should be sent to bpf@vger.kernel.org mailing list. +-You can `subscribe `_ to the +-mailing list search its `archive `_. +-Please search the archive before asking new questions. It very well might +-be that this was already addressed or answered before. +--- a/Documentation/bpf/libbpf/libbpf_naming_convention.rst ++++ b/Documentation/bpf/libbpf/libbpf_naming_convention.rst +@@ -69,7 +69,7 @@ functions. These can be mixed and matche + are not reentrant for performance reasons. + + ABI +-========== ++--- + + libbpf can be both linked statically or used as DSO. To avoid possible + conflicts with other libraries an application is linked with, all diff --git a/patches.suse/libbpf-Replace-btf__type_by_id-with-btf_type_by_id.patch b/patches.suse/libbpf-Replace-btf__type_by_id-with-btf_type_by_id.patch new file mode 100644 index 0000000..65a9906 --- /dev/null +++ b/patches.suse/libbpf-Replace-btf__type_by_id-with-btf_type_by_id.patch @@ -0,0 +1,121 @@ +From: Alexei Starovoitov +Date: Wed, 1 Dec 2021 10:10:24 -0800 +Subject: libbpf: Replace btf__type_by_id() with btf_type_by_id(). +Patch-mainline: v5.17-rc1 +Git-commit: 74753e1462e77349525daf9eb60ea21ed92d3a97 +References: jsc#PED-1368 + +To prepare relo_core.c to be compiled in the kernel and the user space +replace btf__type_by_id with btf_type_by_id. + +In libbpf btf__type_by_id and btf_type_by_id have different behavior. + +bpf_core_apply_relo_insn() needs behavior of uapi btf__type_by_id +vs internal btf_type_by_id, but type_id range check is already done +in bpf_core_apply_relo(), so it's safe to replace it everywhere. +The kernel btf_type_by_id() does the check anyway. It doesn't hurt. + +Suggested-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211201181040.23337-2-alexei.starovoitov@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/btf.c | 2 +- + tools/lib/bpf/libbpf_internal.h | 2 +- + tools/lib/bpf/relo_core.c | 19 ++++++++----------- + 3 files changed, 10 insertions(+), 13 deletions(-) + +--- a/tools/lib/bpf/btf.c ++++ b/tools/lib/bpf/btf.c +@@ -454,7 +454,7 @@ const struct btf *btf__base_btf(const st + } + + /* internal helper returning non-const pointer to a type */ +-struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id) ++struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id) + { + if (type_id == 0) + return &btf_void; +--- a/tools/lib/bpf/libbpf_internal.h ++++ b/tools/lib/bpf/libbpf_internal.h +@@ -172,7 +172,7 @@ static inline void *libbpf_reallocarray( + struct btf; + struct btf_type; + +-struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id); ++struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id); + const char *btf_kind_str(const struct btf_type *t); + const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id); + +--- a/tools/lib/bpf/relo_core.c ++++ b/tools/lib/bpf/relo_core.c +@@ -51,7 +51,7 @@ static bool is_flex_arr(const struct btf + return false; + + /* has to be the last member of enclosing struct */ +- t = btf__type_by_id(btf, acc->type_id); ++ t = btf_type_by_id(btf, acc->type_id); + return acc->idx == btf_vlen(t) - 1; + } + +@@ -388,7 +388,7 @@ static int bpf_core_match_member(const s + return 0; + + local_id = local_acc->type_id; +- local_type = btf__type_by_id(local_btf, local_id); ++ local_type = btf_type_by_id(local_btf, local_id); + local_member = btf_members(local_type) + local_acc->idx; + local_name = btf__name_by_offset(local_btf, local_member->name_off); + +@@ -580,7 +580,7 @@ static int bpf_core_calc_field_relo(cons + return -EUCLEAN; /* request instruction poisoning */ + + acc = &spec->spec[spec->len - 1]; +- t = btf__type_by_id(spec->btf, acc->type_id); ++ t = btf_type_by_id(spec->btf, acc->type_id); + + /* a[n] accessor needs special handling */ + if (!acc->name) { +@@ -729,7 +729,7 @@ static int bpf_core_calc_enumval_relo(co + case BPF_ENUMVAL_VALUE: + if (!spec) + return -EUCLEAN; /* request instruction poisoning */ +- t = btf__type_by_id(spec->btf, spec->spec[0].type_id); ++ t = btf_type_by_id(spec->btf, spec->spec[0].type_id); + e = btf_enum(t) + spec->spec[0].idx; + *val = e->val; + break; +@@ -805,8 +805,8 @@ static int bpf_core_calc_relo(const char + if (res->orig_sz != res->new_sz) { + const struct btf_type *orig_t, *new_t; + +- orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id); +- new_t = btf__type_by_id(targ_spec->btf, res->new_type_id); ++ orig_t = btf_type_by_id(local_spec->btf, res->orig_type_id); ++ new_t = btf_type_by_id(targ_spec->btf, res->new_type_id); + + /* There are two use cases in which it's safe to + * adjust load/store's mem size: +@@ -1054,7 +1054,7 @@ static void bpf_core_dump_spec(int level + int i; + + type_id = spec->root_type_id; +- t = btf__type_by_id(spec->btf, type_id); ++ t = btf_type_by_id(spec->btf, type_id); + s = btf__name_by_offset(spec->btf, t->name_off); + + libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "" : s); +@@ -1158,10 +1158,7 @@ int bpf_core_apply_relo_insn(const char + int i, j, err; + + local_id = relo->type_id; +- local_type = btf__type_by_id(local_btf, local_id); +- if (!local_type) +- return -EINVAL; +- ++ local_type = btf_type_by_id(local_btf, local_id); + local_name = btf__name_by_offset(local_btf, local_type->name_off); + if (!local_name) + return -EINVAL; diff --git a/patches.suse/libbpf-Rework-feature-probing-APIs.patch b/patches.suse/libbpf-Rework-feature-probing-APIs.patch new file mode 100644 index 0000000..c4598cd --- /dev/null +++ b/patches.suse/libbpf-Rework-feature-probing-APIs.patch @@ -0,0 +1,485 @@ +From: Andrii Nakryiko +Date: Fri, 17 Dec 2021 09:12:00 -0800 +Subject: libbpf: Rework feature-probing APIs +Patch-mainline: v5.17-rc1 +Git-commit: 878d8def0603eebf11e19903e7a8886b3e9728e4 +References: jsc#PED-1368 + +Create three extensible alternatives to inconsistently named +feature-probing APIs: + + - libbpf_probe_bpf_prog_type() instead of bpf_probe_prog_type(); + - libbpf_probe_bpf_map_type() instead of bpf_probe_map_type(); + - libbpf_probe_bpf_helper() instead of bpf_probe_helper(). + +Set up return values such that libbpf can report errors (e.g., if some +combination of input arguments isn't possible to validate, etc), in +addition to whether the feature is supported (return value 1) or not +supported (return value 0). + +Also schedule deprecation of those three APIs. Also schedule deprecation +of bpf_probe_large_insn_limit(). + +Also fix all the existing detection logic for various program and map +types that never worked: + + - BPF_PROG_TYPE_LIRC_MODE2; + - BPF_PROG_TYPE_TRACING; + - BPF_PROG_TYPE_LSM; + - BPF_PROG_TYPE_EXT; + - BPF_PROG_TYPE_SYSCALL; + - BPF_PROG_TYPE_STRUCT_OPS; + - BPF_MAP_TYPE_STRUCT_OPS; + - BPF_MAP_TYPE_BLOOM_FILTER. + +Above prog/map types needed special setups and detection logic to work. +Subsequent patch adds selftests that will make sure that all the +detection logic keeps working for all current and future program and map +types, avoiding otherwise inevitable bit rot. + + [0] Closes: https://github.com/libbpf/libbpf/issues/312 + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Acked-by: Dave Marchevsky +Cc: Julia Kartseva +Link: https://lore.kernel.org/bpf/20211217171202.3352835-2-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.h | 52 ++++++++- + tools/lib/bpf/libbpf.map | 3 + tools/lib/bpf/libbpf_probes.c | 235 +++++++++++++++++++++++++++++++++--------- + 3 files changed, 236 insertions(+), 54 deletions(-) + +--- a/tools/lib/bpf/libbpf.h ++++ b/tools/lib/bpf/libbpf.h +@@ -1002,13 +1002,57 @@ bpf_prog_linfo__lfind(const struct bpf_p + * user, causing subsequent probes to fail. In this case, the caller may want + * to adjust that limit with setrlimit(). + */ +-LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, +- __u32 ifindex); ++LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_prog_type() instead") ++LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex); ++LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_map_type() instead") + LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex); +-LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, +- enum bpf_prog_type prog_type, __u32 ifindex); ++LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_helper() instead") ++LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, __u32 ifindex); ++LIBBPF_DEPRECATED_SINCE(0, 8, "implement your own or use bpftool for feature detection") + LIBBPF_API bool bpf_probe_large_insn_limit(__u32 ifindex); + ++/** ++ * @brief **libbpf_probe_bpf_prog_type()** detects if host kernel supports ++ * BPF programs of a given type. ++ * @param prog_type BPF program type to detect kernel support for ++ * @param opts reserved for future extensibility, should be NULL ++ * @return 1, if given program type is supported; 0, if given program type is ++ * not supported; negative error code if feature detection failed or can't be ++ * performed ++ * ++ * Make sure the process has required set of CAP_* permissions (or runs as ++ * root) when performing feature checking. ++ */ ++LIBBPF_API int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts); ++/** ++ * @brief **libbpf_probe_bpf_map_type()** detects if host kernel supports ++ * BPF maps of a given type. ++ * @param map_type BPF map type to detect kernel support for ++ * @param opts reserved for future extensibility, should be NULL ++ * @return 1, if given map type is supported; 0, if given map type is ++ * not supported; negative error code if feature detection failed or can't be ++ * performed ++ * ++ * Make sure the process has required set of CAP_* permissions (or runs as ++ * root) when performing feature checking. ++ */ ++LIBBPF_API int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts); ++/** ++ * @brief **libbpf_probe_bpf_helper()** detects if host kernel supports the ++ * use of a given BPF helper from specified BPF program type. ++ * @param prog_type BPF program type used to check the support of BPF helper ++ * @param helper_id BPF helper ID (enum bpf_func_id) to check support for ++ * @param opts reserved for future extensibility, should be NULL ++ * @return 1, if given combination of program type and helper is supported; 0, ++ * if the combination is not supported; negative error code if feature ++ * detection for provided input arguments failed or can't be performed ++ * ++ * Make sure the process has required set of CAP_* permissions (or runs as ++ * root) when performing feature checking. ++ */ ++LIBBPF_API int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, ++ enum bpf_func_id helper_id, const void *opts); ++ + /* + * Get bpf_prog_info in continuous memory + * +--- a/tools/lib/bpf/libbpf.map ++++ b/tools/lib/bpf/libbpf.map +@@ -427,5 +427,8 @@ LIBBPF_0.7.0 { + bpf_program__log_level; + bpf_program__set_log_buf; + bpf_program__set_log_level; ++ libbpf_probe_bpf_helper; ++ libbpf_probe_bpf_map_type; ++ libbpf_probe_bpf_prog_type; + libbpf_set_memlock_rlim_max; + }; +--- a/tools/lib/bpf/libbpf_probes.c ++++ b/tools/lib/bpf/libbpf_probes.c +@@ -64,12 +64,20 @@ static int get_kernel_version(void) + return (version << 16) + (subversion << 8) + patchlevel; + } + +-static void +-probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, +- size_t insns_cnt, char *buf, size_t buf_len, __u32 ifindex) ++static int probe_prog_load(enum bpf_prog_type prog_type, ++ const struct bpf_insn *insns, size_t insns_cnt, ++ char *log_buf, size_t log_buf_sz, ++ __u32 ifindex) + { +- LIBBPF_OPTS(bpf_prog_load_opts, opts); +- int fd; ++ LIBBPF_OPTS(bpf_prog_load_opts, opts, ++ .log_buf = log_buf, ++ .log_size = log_buf_sz, ++ .log_level = log_buf ? 1 : 0, ++ .prog_ifindex = ifindex, ++ ); ++ int fd, err, exp_err = 0; ++ const char *exp_msg = NULL; ++ char buf[4096]; + + switch (prog_type) { + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: +@@ -84,6 +92,38 @@ probe_load(enum bpf_prog_type prog_type, + case BPF_PROG_TYPE_KPROBE: + opts.kern_version = get_kernel_version(); + break; ++ case BPF_PROG_TYPE_LIRC_MODE2: ++ opts.expected_attach_type = BPF_LIRC_MODE2; ++ break; ++ case BPF_PROG_TYPE_TRACING: ++ case BPF_PROG_TYPE_LSM: ++ opts.log_buf = buf; ++ opts.log_size = sizeof(buf); ++ opts.log_level = 1; ++ if (prog_type == BPF_PROG_TYPE_TRACING) ++ opts.expected_attach_type = BPF_TRACE_FENTRY; ++ else ++ opts.expected_attach_type = BPF_MODIFY_RETURN; ++ opts.attach_btf_id = 1; ++ ++ exp_err = -EINVAL; ++ exp_msg = "attach_btf_id 1 is not a function"; ++ break; ++ case BPF_PROG_TYPE_EXT: ++ opts.log_buf = buf; ++ opts.log_size = sizeof(buf); ++ opts.log_level = 1; ++ opts.attach_btf_id = 1; ++ ++ exp_err = -EINVAL; ++ exp_msg = "Cannot replace kernel functions"; ++ break; ++ case BPF_PROG_TYPE_SYSCALL: ++ opts.prog_flags = BPF_F_SLEEPABLE; ++ break; ++ case BPF_PROG_TYPE_STRUCT_OPS: ++ exp_err = -524; /* -ENOTSUPP */ ++ break; + case BPF_PROG_TYPE_UNSPEC: + case BPF_PROG_TYPE_SOCKET_FILTER: + case BPF_PROG_TYPE_SCHED_CLS: +@@ -103,25 +143,42 @@ probe_load(enum bpf_prog_type prog_type, + case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: + case BPF_PROG_TYPE_LWT_SEG6LOCAL: +- case BPF_PROG_TYPE_LIRC_MODE2: + case BPF_PROG_TYPE_SK_REUSEPORT: + case BPF_PROG_TYPE_FLOW_DISSECTOR: + case BPF_PROG_TYPE_CGROUP_SYSCTL: +- case BPF_PROG_TYPE_TRACING: +- case BPF_PROG_TYPE_STRUCT_OPS: +- case BPF_PROG_TYPE_EXT: +- case BPF_PROG_TYPE_LSM: +- default: + break; ++ default: ++ return -EOPNOTSUPP; + } + +- opts.prog_ifindex = ifindex; +- opts.log_buf = buf; +- opts.log_size = buf_len; +- +- fd = bpf_prog_load(prog_type, NULL, "GPL", insns, insns_cnt, NULL); ++ fd = bpf_prog_load(prog_type, NULL, "GPL", insns, insns_cnt, &opts); ++ err = -errno; + if (fd >= 0) + close(fd); ++ if (exp_err) { ++ if (fd >= 0 || err != exp_err) ++ return 0; ++ if (exp_msg && !strstr(buf, exp_msg)) ++ return 0; ++ return 1; ++ } ++ return fd >= 0 ? 1 : 0; ++} ++ ++int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts) ++{ ++ struct bpf_insn insns[] = { ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN() ++ }; ++ const size_t insn_cnt = ARRAY_SIZE(insns); ++ int ret; ++ ++ if (opts) ++ return libbpf_err(-EINVAL); ++ ++ ret = probe_prog_load(prog_type, insns, insn_cnt, NULL, 0, 0); ++ return libbpf_err(ret); + } + + bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex) +@@ -131,12 +188,16 @@ bool bpf_probe_prog_type(enum bpf_prog_t + BPF_EXIT_INSN() + }; + ++ /* prefer libbpf_probe_bpf_prog_type() unless offload is requested */ ++ if (ifindex == 0) ++ return libbpf_probe_bpf_prog_type(prog_type, NULL) == 1; ++ + if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS) + /* nfp returns -EINVAL on exit(0) with TC offload */ + insns[0].imm = 2; + + errno = 0; +- probe_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex); ++ probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex); + + return errno != EINVAL && errno != EOPNOTSUPP; + } +@@ -197,16 +258,18 @@ static int load_local_storage_btf(void) + strs, sizeof(strs)); + } + +-bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) ++static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex) + { +- int key_size, value_size, max_entries, map_flags; ++ LIBBPF_OPTS(bpf_map_create_opts, opts); ++ int key_size, value_size, max_entries; + __u32 btf_key_type_id = 0, btf_value_type_id = 0; +- int fd = -1, btf_fd = -1, fd_inner; ++ int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err; ++ ++ opts.map_ifindex = ifindex; + + key_size = sizeof(__u32); + value_size = sizeof(__u32); + max_entries = 1; +- map_flags = 0; + + switch (map_type) { + case BPF_MAP_TYPE_STACK_TRACE: +@@ -215,7 +278,7 @@ bool bpf_probe_map_type(enum bpf_map_typ + case BPF_MAP_TYPE_LPM_TRIE: + key_size = sizeof(__u64); + value_size = sizeof(__u64); +- map_flags = BPF_F_NO_PREALLOC; ++ opts.map_flags = BPF_F_NO_PREALLOC; + break; + case BPF_MAP_TYPE_CGROUP_STORAGE: + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: +@@ -234,17 +297,25 @@ bool bpf_probe_map_type(enum bpf_map_typ + btf_value_type_id = 3; + value_size = 8; + max_entries = 0; +- map_flags = BPF_F_NO_PREALLOC; ++ opts.map_flags = BPF_F_NO_PREALLOC; + btf_fd = load_local_storage_btf(); + if (btf_fd < 0) +- return false; ++ return btf_fd; + break; + case BPF_MAP_TYPE_RINGBUF: + key_size = 0; + value_size = 0; + max_entries = 4096; + break; +- case BPF_MAP_TYPE_UNSPEC: ++ case BPF_MAP_TYPE_STRUCT_OPS: ++ /* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */ ++ opts.btf_vmlinux_value_type_id = 1; ++ exp_err = -524; /* -ENOTSUPP */ ++ break; ++ case BPF_MAP_TYPE_BLOOM_FILTER: ++ key_size = 0; ++ max_entries = 1; ++ break; + case BPF_MAP_TYPE_HASH: + case BPF_MAP_TYPE_ARRAY: + case BPF_MAP_TYPE_PROG_ARRAY: +@@ -263,49 +334,114 @@ bool bpf_probe_map_type(enum bpf_map_typ + case BPF_MAP_TYPE_XSKMAP: + case BPF_MAP_TYPE_SOCKHASH: + case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: +- case BPF_MAP_TYPE_STRUCT_OPS: +- default: + break; ++ case BPF_MAP_TYPE_UNSPEC: ++ default: ++ return -EOPNOTSUPP; + } + + if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || + map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { +- LIBBPF_OPTS(bpf_map_create_opts, opts); +- + /* TODO: probe for device, once libbpf has a function to create + * map-in-map for offload + */ + if (ifindex) +- return false; ++ goto cleanup; + + fd_inner = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, + sizeof(__u32), sizeof(__u32), 1, NULL); + if (fd_inner < 0) +- return false; ++ goto cleanup; + + opts.inner_map_fd = fd_inner; +- fd = bpf_map_create(map_type, NULL, sizeof(__u32), sizeof(__u32), 1, &opts); +- close(fd_inner); +- } else { +- LIBBPF_OPTS(bpf_map_create_opts, opts); +- +- /* Note: No other restriction on map type probes for offload */ +- opts.map_flags = map_flags; +- opts.map_ifindex = ifindex; +- if (btf_fd >= 0) { +- opts.btf_fd = btf_fd; +- opts.btf_key_type_id = btf_key_type_id; +- opts.btf_value_type_id = btf_value_type_id; +- } ++ } + +- fd = bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts); ++ if (btf_fd >= 0) { ++ opts.btf_fd = btf_fd; ++ opts.btf_key_type_id = btf_key_type_id; ++ opts.btf_value_type_id = btf_value_type_id; + } ++ ++ fd = bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts); ++ err = -errno; ++ ++cleanup: + if (fd >= 0) + close(fd); ++ if (fd_inner >= 0) ++ close(fd_inner); + if (btf_fd >= 0) + close(btf_fd); + +- return fd >= 0; ++ if (exp_err) ++ return fd < 0 && err == exp_err ? 1 : 0; ++ else ++ return fd >= 0 ? 1 : 0; ++} ++ ++int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts) ++{ ++ int ret; ++ ++ if (opts) ++ return libbpf_err(-EINVAL); ++ ++ ret = probe_map_create(map_type, 0); ++ return libbpf_err(ret); ++} ++ ++bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) ++{ ++ return probe_map_create(map_type, ifindex) == 1; ++} ++ ++int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helper_id, ++ const void *opts) ++{ ++ struct bpf_insn insns[] = { ++ BPF_EMIT_CALL((__u32)helper_id), ++ BPF_EXIT_INSN(), ++ }; ++ const size_t insn_cnt = ARRAY_SIZE(insns); ++ char buf[4096]; ++ int ret; ++ ++ if (opts) ++ return libbpf_err(-EINVAL); ++ ++ /* we can't successfully load all prog types to check for BPF helper ++ * support, so bail out with -EOPNOTSUPP error ++ */ ++ switch (prog_type) { ++ case BPF_PROG_TYPE_TRACING: ++ case BPF_PROG_TYPE_EXT: ++ case BPF_PROG_TYPE_LSM: ++ case BPF_PROG_TYPE_STRUCT_OPS: ++ return -EOPNOTSUPP; ++ default: ++ break; ++ } ++ ++ buf[0] = '\0'; ++ ret = probe_prog_load(prog_type, insns, insn_cnt, buf, sizeof(buf), 0); ++ if (ret < 0) ++ return libbpf_err(ret); ++ ++ /* If BPF verifier doesn't recognize BPF helper ID (enum bpf_func_id) ++ * at all, it will emit something like "invalid func unknown#181". ++ * If BPF verifier recognizes BPF helper but it's not supported for ++ * given BPF program type, it will emit "unknown func bpf_sys_bpf#166". ++ * In both cases, provided combination of BPF program type and BPF ++ * helper is not supported by the kernel. ++ * In all other cases, probe_prog_load() above will either succeed (e.g., ++ * because BPF helper happens to accept no input arguments or it ++ * accepts one input argument and initial PTR_TO_CTX is fine for ++ * that), or we'll get some more specific BPF verifier error about ++ * some unsatisfied conditions. ++ */ ++ if (ret == 0 && (strstr(buf, "invalid func ") || strstr(buf, "unknown func "))) ++ return 0; ++ return 1; /* assume supported */ + } + + bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, +@@ -318,8 +454,7 @@ bool bpf_probe_helper(enum bpf_func_id i + char buf[4096] = {}; + bool res; + +- probe_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), +- ifindex); ++ probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), ifindex); + res = !grep(buf, "invalid func ") && !grep(buf, "unknown func "); + + if (ifindex) { +@@ -351,8 +486,8 @@ bool bpf_probe_large_insn_limit(__u32 if + insns[BPF_MAXINSNS] = BPF_EXIT_INSN(); + + errno = 0; +- probe_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0, +- ifindex); ++ probe_prog_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0, ++ ifindex); + + return errno != E2BIG && errno != EINVAL; + } diff --git a/patches.suse/libbpf-Silence-uninitialized-warning-error-in-btf_du.patch b/patches.suse/libbpf-Silence-uninitialized-warning-error-in-btf_du.patch new file mode 100644 index 0000000..b8f1448 --- /dev/null +++ b/patches.suse/libbpf-Silence-uninitialized-warning-error-in-btf_du.patch @@ -0,0 +1,47 @@ +From: Alan Maguire +Date: Mon, 29 Nov 2021 10:00:40 +0000 +Subject: libbpf: Silence uninitialized warning/error in + btf_dump_dump_type_data +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.17-rc1 +Git-commit: 43174f0d4597325cb91f1f1f55263eb6e6101036 +References: jsc#PED-1368 + +When compiling libbpf with gcc 4.8.5, we see: + + CC staticobjs/btf_dump.o +btf_dump.c: In function ‘btf_dump_dump_type_data.isra.24’: +btf_dump.c:2296:5: error: ‘err’ may be used uninitialized in this function [-Werror=maybe-uninitialized] + if (err < 0) + ^ +cc1: all warnings being treated as errors +make: *** [staticobjs/btf_dump.o] Error 1 + +While gcc 4.8.5 is too old to build the upstream kernel, it's possible it +could be used to build standalone libbpf which suffers from the same problem. +Silence the error by initializing 'err' to 0. The warning/error seems to be +a false positive since err is set early in the function. Regardless we +shouldn't prevent libbpf from building for this. + +Fixes: 920d16af9b42 ("libbpf: BTF dumper support for typed data") +Signed-off-by: Alan Maguire +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/1638180040-8037-1-git-send-email-alan.maguire@oracle.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/btf_dump.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/lib/bpf/btf_dump.c ++++ b/tools/lib/bpf/btf_dump.c +@@ -2216,7 +2216,7 @@ static int btf_dump_dump_type_data(struc + __u8 bits_offset, + __u8 bit_sz) + { +- int size, err; ++ int size, err = 0; + + size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset); + if (size < 0) diff --git a/patches.suse/libbpf-Stop-using-to-be-deprecated-APIs.patch b/patches.suse/libbpf-Stop-using-to-be-deprecated-APIs.patch new file mode 100644 index 0000000..142b5f0 --- /dev/null +++ b/patches.suse/libbpf-Stop-using-to-be-deprecated-APIs.patch @@ -0,0 +1,63 @@ +From: Andrii Nakryiko +Date: Wed, 3 Nov 2021 15:08:38 -0700 +Subject: libbpf: Stop using to-be-deprecated APIs +Patch-mainline: v5.17-rc1 +Git-commit: bcc40fc0021d4b7c016f8bcf62bd4e21251fdee8 +References: jsc#PED-1368 + +Remove all the internal uses of libbpf APIs that are slated to be +deprecated in v0.7. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211103220845.2676888-6-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -7707,7 +7707,7 @@ int bpf_object__pin_maps(struct bpf_obje + return 0; + + err_unpin_maps: +- while ((map = bpf_map__prev(map, obj))) { ++ while ((map = bpf_object__prev_map(obj, map))) { + if (!map->pin_path) + continue; + +@@ -7787,7 +7787,7 @@ int bpf_object__pin_programs(struct bpf_ + return 0; + + err_unpin_programs: +- while ((prog = bpf_program__prev(prog, obj))) { ++ while ((prog = bpf_object__prev_program(obj, prog))) { + char buf[PATH_MAX]; + int len; + +@@ -8128,9 +8128,11 @@ int bpf_program__set_autoload(struct bpf + return 0; + } + ++static int bpf_program_nth_fd(const struct bpf_program *prog, int n); ++ + int bpf_program__fd(const struct bpf_program *prog) + { +- return bpf_program__nth_fd(prog, 0); ++ return bpf_program_nth_fd(prog, 0); + } + + size_t bpf_program__size(const struct bpf_program *prog) +@@ -8176,7 +8178,10 @@ int bpf_program__set_prep(struct bpf_pro + return 0; + } + +-int bpf_program__nth_fd(const struct bpf_program *prog, int n) ++__attribute__((alias("bpf_program_nth_fd"))) ++int bpf_program__nth_fd(const struct bpf_program *prog, int n); ++ ++static int bpf_program_nth_fd(const struct bpf_program *prog, int n) + { + int fd; + diff --git a/patches.suse/libbpf-Support-BTF_KIND_TYPE_TAG.patch b/patches.suse/libbpf-Support-BTF_KIND_TYPE_TAG.patch new file mode 100644 index 0000000..eda6dff --- /dev/null +++ b/patches.suse/libbpf-Support-BTF_KIND_TYPE_TAG.patch @@ -0,0 +1,286 @@ +From: Yonghong Song +Date: Thu, 11 Nov 2021 17:26:14 -0800 +Subject: libbpf: Support BTF_KIND_TYPE_TAG +Patch-mainline: v5.17-rc1 +Git-commit: 2dc1e488e5cdfd937554ca81fd46ad874d244b3f +References: jsc#PED-1368 + +Add libbpf support for BTF_KIND_TYPE_TAG. + +Signed-off-by: Yonghong Song +Signed-off-by: Alexei Starovoitov +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211112012614.1505315-1-yhs@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/btf.c | 23 +++++++++++++++++++++++ + tools/lib/bpf/btf.h | 9 ++++++++- + tools/lib/bpf/btf_dump.c | 9 +++++++++ + tools/lib/bpf/libbpf.c | 31 ++++++++++++++++++++++++++++++- + tools/lib/bpf/libbpf.map | 1 + + tools/lib/bpf/libbpf_internal.h | 2 ++ + 6 files changed, 73 insertions(+), 2 deletions(-) + +--- a/tools/lib/bpf/btf.c ++++ b/tools/lib/bpf/btf.c +@@ -299,6 +299,7 @@ static int btf_type_size(const struct bt + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + case BTF_KIND_FLOAT: ++ case BTF_KIND_TYPE_TAG: + return base_size; + case BTF_KIND_INT: + return base_size + sizeof(__u32); +@@ -349,6 +350,7 @@ static int btf_bswap_type_rest(struct bt + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + case BTF_KIND_FLOAT: ++ case BTF_KIND_TYPE_TAG: + return 0; + case BTF_KIND_INT: + *(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1)); +@@ -649,6 +651,7 @@ int btf__align_of(const struct btf *btf, + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: ++ case BTF_KIND_TYPE_TAG: + return btf__align_of(btf, t->type); + case BTF_KIND_ARRAY: + return btf__align_of(btf, btf_array(t)->type); +@@ -2236,6 +2239,22 @@ int btf__add_restrict(struct btf *btf, i + } + + /* ++ * Append new BTF_KIND_TYPE_TAG type with: ++ * - *value*, non-empty/non-NULL tag value; ++ * - *ref_type_id* - referenced type ID, it might not exist yet; ++ * Returns: ++ * - >0, type ID of newly added BTF type; ++ * - <0, on error. ++ */ ++int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id) ++{ ++ if (!value|| !value[0]) ++ return libbpf_err(-EINVAL); ++ ++ return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id); ++} ++ ++/* + * Append new BTF_KIND_FUNC type with: + * - *name*, non-empty/non-NULL name; + * - *proto_type_id* - FUNC_PROTO's type ID, it might not exist yet; +@@ -3639,6 +3658,7 @@ static int btf_dedup_prep(struct btf_ded + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + case BTF_KIND_FLOAT: ++ case BTF_KIND_TYPE_TAG: + h = btf_hash_common(t); + break; + case BTF_KIND_INT: +@@ -3699,6 +3719,7 @@ static int btf_dedup_prim_type(struct bt + case BTF_KIND_VAR: + case BTF_KIND_DATASEC: + case BTF_KIND_DECL_TAG: ++ case BTF_KIND_TYPE_TAG: + return 0; + + case BTF_KIND_INT: +@@ -4297,6 +4318,7 @@ static int btf_dedup_ref_type(struct btf + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: ++ case BTF_KIND_TYPE_TAG: + ref_type_id = btf_dedup_ref_type(d, t->type); + if (ref_type_id < 0) + return ref_type_id; +@@ -4603,6 +4625,7 @@ int btf_type_visit_type_ids(struct btf_t + case BTF_KIND_FUNC: + case BTF_KIND_VAR: + case BTF_KIND_DECL_TAG: ++ case BTF_KIND_TYPE_TAG: + return visit(&t->type, ctx); + + case BTF_KIND_ARRAY: { +--- a/tools/lib/bpf/btf.h ++++ b/tools/lib/bpf/btf.h +@@ -227,6 +227,7 @@ LIBBPF_API int btf__add_typedef(struct b + LIBBPF_API int btf__add_volatile(struct btf *btf, int ref_type_id); + LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id); + LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id); ++LIBBPF_API int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id); + + /* func and func_proto construction APIs */ + LIBBPF_API int btf__add_func(struct btf *btf, const char *name, +@@ -458,7 +459,8 @@ static inline bool btf_is_mod(const stru + + return kind == BTF_KIND_VOLATILE || + kind == BTF_KIND_CONST || +- kind == BTF_KIND_RESTRICT; ++ kind == BTF_KIND_RESTRICT || ++ kind == BTF_KIND_TYPE_TAG; + } + + static inline bool btf_is_func(const struct btf_type *t) +@@ -491,6 +493,11 @@ static inline bool btf_is_decl_tag(const + return btf_kind(t) == BTF_KIND_DECL_TAG; + } + ++static inline bool btf_is_type_tag(const struct btf_type *t) ++{ ++ return btf_kind(t) == BTF_KIND_TYPE_TAG; ++} ++ + static inline __u8 btf_int_encoding(const struct btf_type *t) + { + return BTF_INT_ENCODING(*(__u32 *)(t + 1)); +--- a/tools/lib/bpf/btf_dump.c ++++ b/tools/lib/bpf/btf_dump.c +@@ -330,6 +330,7 @@ static int btf_dump_mark_referenced(stru + case BTF_KIND_FUNC: + case BTF_KIND_VAR: + case BTF_KIND_DECL_TAG: ++ case BTF_KIND_TYPE_TAG: + d->type_states[t->type].referenced = 1; + break; + +@@ -573,6 +574,7 @@ static int btf_dump_order_type(struct bt + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: ++ case BTF_KIND_TYPE_TAG: + return btf_dump_order_type(d, t->type, through_ptr); + + case BTF_KIND_FUNC_PROTO: { +@@ -747,6 +749,7 @@ static void btf_dump_emit_type(struct bt + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: ++ case BTF_KIND_TYPE_TAG: + btf_dump_emit_type(d, t->type, cont_id); + break; + case BTF_KIND_ARRAY: +@@ -1167,6 +1170,7 @@ skip_mod: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_FUNC_PROTO: ++ case BTF_KIND_TYPE_TAG: + id = t->type; + break; + case BTF_KIND_ARRAY: +@@ -1335,6 +1339,11 @@ static void btf_dump_emit_type_chain(str + case BTF_KIND_RESTRICT: + btf_dump_printf(d, " restrict"); + break; ++ case BTF_KIND_TYPE_TAG: ++ btf_dump_emit_mods(d, decls); ++ name = btf_name_of(d, t->name_off); ++ btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name); ++ break; + case BTF_KIND_ARRAY: { + const struct btf_array *a = btf_array(t); + const struct btf_type *next_t; +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -197,6 +197,8 @@ enum kern_feature_id { + FEAT_PERF_LINK, + /* BTF_KIND_DECL_TAG support */ + FEAT_BTF_DECL_TAG, ++ /* BTF_KIND_TYPE_TAG support */ ++ FEAT_BTF_TYPE_TAG, + __FEAT_CNT, + }; + +@@ -2076,6 +2078,7 @@ static const char *__btf_kind_str(__u16 + case BTF_KIND_DATASEC: return "datasec"; + case BTF_KIND_FLOAT: return "float"; + case BTF_KIND_DECL_TAG: return "decl_tag"; ++ case BTF_KIND_TYPE_TAG: return "type_tag"; + default: return "unknown"; + } + } +@@ -2588,8 +2591,10 @@ static bool btf_needs_sanitization(struc + bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); + bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); + bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); ++ bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); + +- return !has_func || !has_datasec || !has_func_global || !has_float || !has_decl_tag; ++ return !has_func || !has_datasec || !has_func_global || !has_float || ++ !has_decl_tag || !has_type_tag; + } + + static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) +@@ -2599,6 +2604,7 @@ static void bpf_object__sanitize_btf(str + bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); + bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); + bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); ++ bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); + struct btf_type *t; + int i, j, vlen; + +@@ -2657,6 +2663,10 @@ static void bpf_object__sanitize_btf(str + */ + t->name_off = 0; + t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0); ++ } else if (!has_type_tag && btf_is_type_tag(t)) { ++ /* replace TYPE_TAG with a CONST */ ++ t->name_off = 0; ++ t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0); + } + } + } +@@ -4460,6 +4470,22 @@ static int probe_kern_btf_decl_tag(void) + strs, sizeof(strs))); + } + ++static int probe_kern_btf_type_tag(void) ++{ ++ static const char strs[] = "\0tag"; ++ __u32 types[] = { ++ /* int */ ++ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ ++ /* attr */ ++ BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ ++ /* ptr */ ++ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ ++ }; ++ ++ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), ++ strs, sizeof(strs))); ++} ++ + static int probe_kern_array_mmap(void) + { + struct bpf_create_map_attr attr = { +@@ -4657,6 +4683,9 @@ static struct kern_feature_desc { + [FEAT_BTF_DECL_TAG] = { + "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, + }, ++ [FEAT_BTF_TYPE_TAG] = { ++ "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, ++ }, + }; + + static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) +--- a/tools/lib/bpf/libbpf.map ++++ b/tools/lib/bpf/libbpf.map +@@ -403,6 +403,7 @@ LIBBPF_0.6.0 { + bpf_program__set_extra_flags; + btf__add_btf; + btf__add_decl_tag; ++ btf__add_type_tag; + btf__dedup; + btf__dedup_deprecated; + btf__raw_data; +--- a/tools/lib/bpf/libbpf_internal.h ++++ b/tools/lib/bpf/libbpf_internal.h +@@ -73,6 +73,8 @@ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz) + #define BTF_TYPE_DECL_TAG_ENC(value, type, component_idx) \ + BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx) ++#define BTF_TYPE_TYPE_TAG_ENC(value, type) \ ++ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 0, 0), type) + + #ifndef likely + #define likely(x) __builtin_expect(!!(x), 1) diff --git a/patches.suse/libbpf-Support-init-of-inner-maps-in-light-skeleton.patch b/patches.suse/libbpf-Support-init-of-inner-maps-in-light-skeleton.patch new file mode 100644 index 0000000..300ccdf --- /dev/null +++ b/patches.suse/libbpf-Support-init-of-inner-maps-in-light-skeleton.patch @@ -0,0 +1,80 @@ +From: Alexei Starovoitov +Date: Wed, 1 Dec 2021 10:10:33 -0800 +Subject: libbpf: Support init of inner maps in light skeleton. +Patch-mainline: v5.17-rc1 +Git-commit: be05c94476f3cf4fdc29feab4ed1053187323296 +References: jsc#PED-1368 + +Add ability to initialize inner maps in light skeleton. + +Signed-off-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211201181040.23337-11-alexei.starovoitov@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/bpf_gen_internal.h | 1 + + tools/lib/bpf/gen_loader.c | 27 +++++++++++++++++++++++++++ + tools/lib/bpf/libbpf.c | 6 +++--- + 3 files changed, 31 insertions(+), 3 deletions(-) + +--- a/tools/lib/bpf/bpf_gen_internal.h ++++ b/tools/lib/bpf/bpf_gen_internal.h +@@ -67,5 +67,6 @@ void bpf_gen__record_attach_target(struc + void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, + bool is_typeless, int kind, int insn_idx); + void bpf_gen__record_relo_core(struct bpf_gen *gen, const struct bpf_core_relo *core_relo); ++void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int key, int inner_map_idx); + + #endif +--- a/tools/lib/bpf/gen_loader.c ++++ b/tools/lib/bpf/gen_loader.c +@@ -1066,6 +1066,33 @@ void bpf_gen__map_update_elem(struct bpf + emit_check_err(gen); + } + ++void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int slot, ++ int inner_map_idx) ++{ ++ int attr_size = offsetofend(union bpf_attr, flags); ++ int map_update_attr, key; ++ union bpf_attr attr; ++ ++ memset(&attr, 0, attr_size); ++ pr_debug("gen: populate_outer_map: outer %d key %d inner %d\n", ++ outer_map_idx, slot, inner_map_idx); ++ ++ key = add_data(gen, &slot, sizeof(slot)); ++ ++ map_update_attr = add_data(gen, &attr, attr_size); ++ move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, ++ blob_fd_array_off(gen, outer_map_idx)); ++ emit_rel_store(gen, attr_field(map_update_attr, key), key); ++ emit_rel_store(gen, attr_field(map_update_attr, value), ++ blob_fd_array_off(gen, inner_map_idx)); ++ ++ /* emit MAP_UPDATE_ELEM command */ ++ emit_sys_bpf(gen, BPF_MAP_UPDATE_ELEM, map_update_attr, attr_size); ++ debug_ret(gen, "populate_outer_map outer %d key %d inner %d", ++ outer_map_idx, slot, inner_map_idx); ++ emit_check_err(gen); ++} ++ + void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx) + { + int attr_size = offsetofend(union bpf_attr, map_fd); +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -4971,9 +4971,9 @@ static int init_map_in_map_slots(struct + fd = bpf_map__fd(targ_map); + + if (obj->gen_loader) { +- pr_warn("// TODO map_update_elem: idx %td key %d value==map_idx %td\n", +- map - obj->maps, i, targ_map - obj->maps); +- return -ENOTSUP; ++ bpf_gen__populate_outer_map(obj->gen_loader, ++ map - obj->maps, i, ++ targ_map - obj->maps); + } else { + err = bpf_map_update_elem(map->fd, &i, &fd, 0); + } diff --git a/patches.suse/libbpf-Support-repeated-legacy-kprobes-on-same-funct.patch b/patches.suse/libbpf-Support-repeated-legacy-kprobes-on-same-funct.patch new file mode 100644 index 0000000..290bec9 --- /dev/null +++ b/patches.suse/libbpf-Support-repeated-legacy-kprobes-on-same-funct.patch @@ -0,0 +1,36 @@ +From: Qiang Wang +Date: Mon, 27 Dec 2021 21:07:13 +0800 +Subject: libbpf: Support repeated legacy kprobes on same function +Patch-mainline: v5.17-rc1 +Git-commit: 51a33c60f1c22c0d2dafad774315ba1537765442 +References: jsc#PED-1368 + +If repeated legacy kprobes on same function in one process, +libbpf will register using the same probe name and got -EBUSY +error. So append index to the probe name format to fix this +problem. + +Co-developed-by: Chengming Zhou +Signed-off-by: Qiang Wang +Signed-off-by: Chengming Zhou +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211227130713.66933-2-wangqiang.wq.frank@bytedance.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -9914,7 +9914,10 @@ static int append_to_file(const char *fi + static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, + const char *kfunc_name, size_t offset) + { +- snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), kfunc_name, offset); ++ static int index = 0; ++ ++ snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset, ++ __sync_fetch_and_add(&index, 1)); + } + + static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, diff --git a/patches.suse/libbpf-Support-static-initialization-of-BPF_MAP_TYPE.patch b/patches.suse/libbpf-Support-static-initialization-of-BPF_MAP_TYPE.patch new file mode 100644 index 0000000..b8a006b --- /dev/null +++ b/patches.suse/libbpf-Support-static-initialization-of-BPF_MAP_TYPE.patch @@ -0,0 +1,310 @@ +From: Hengqi Chen +Date: Sun, 28 Nov 2021 22:16:32 +0800 +Subject: libbpf: Support static initialization of BPF_MAP_TYPE_PROG_ARRAY +Patch-mainline: v5.17-rc1 +Git-commit: 341ac5ffc4bd859103899c876902caf07cc97ea4 +References: jsc#PED-1368 + +Support static initialization of BPF_MAP_TYPE_PROG_ARRAY with a +syntax similar to map-in-map initialization ([0]): + + SEC("socket") + int tailcall_1(void *ctx) + { + return 0; + } + + struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 2); + __uint(key_size, sizeof(__u32)); + __array(values, int (void *)); + } prog_array_init SEC(".maps") = { + .values = { + [1] = (void *)&tailcall_1, + }, + }; + +Here's the relevant part of libbpf debug log showing what's +going on with prog-array initialization: + +libbpf: sec '.relsocket': collecting relocation for section(3) 'socket' +libbpf: sec '.relsocket': relo #0: insn #2 against 'prog_array_init' +libbpf: prog 'entry': found map 0 (prog_array_init, sec 4, off 0) for insn #0 +libbpf: .maps relo #0: for 3 value 0 rel->r_offset 32 name 53 ('tailcall_1') +libbpf: .maps relo #0: map 'prog_array_init' slot [1] points to prog 'tailcall_1' +libbpf: map 'prog_array_init': created successfully, fd=5 +libbpf: map 'prog_array_init': slot [1] set to prog 'tailcall_1' fd=6 + + [0] Closes: https://github.com/libbpf/libbpf/issues/354 + +Signed-off-by: Hengqi Chen +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211128141633.502339-2-hengqi.chen@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 154 ++++++++++++++++++++++++++++++++++++++----------- + 1 file changed, 121 insertions(+), 33 deletions(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -2277,6 +2277,9 @@ int parse_btf_map_def(const char *map_na + map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE; + } + else if (strcmp(name, "values") == 0) { ++ bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type); ++ bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY; ++ const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value"; + char inner_map_name[128]; + int err; + +@@ -2290,8 +2293,8 @@ int parse_btf_map_def(const char *map_na + map_name, name); + return -EINVAL; + } +- if (!bpf_map_type__is_map_in_map(map_def->map_type)) { +- pr_warn("map '%s': should be map-in-map.\n", ++ if (!is_map_in_map && !is_prog_array) { ++ pr_warn("map '%s': should be map-in-map or prog-array.\n", + map_name); + return -ENOTSUP; + } +@@ -2303,22 +2306,30 @@ int parse_btf_map_def(const char *map_na + map_def->value_size = 4; + t = btf__type_by_id(btf, m->type); + if (!t) { +- pr_warn("map '%s': map-in-map inner type [%d] not found.\n", +- map_name, m->type); ++ pr_warn("map '%s': %s type [%d] not found.\n", ++ map_name, desc, m->type); + return -EINVAL; + } + if (!btf_is_array(t) || btf_array(t)->nelems) { +- pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n", +- map_name); ++ pr_warn("map '%s': %s spec is not a zero-sized array.\n", ++ map_name, desc); + return -EINVAL; + } + t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL); + if (!btf_is_ptr(t)) { +- pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", +- map_name, btf_kind_str(t)); ++ pr_warn("map '%s': %s def is of unexpected kind %s.\n", ++ map_name, desc, btf_kind_str(t)); + return -EINVAL; + } + t = skip_mods_and_typedefs(btf, t->type, NULL); ++ if (is_prog_array) { ++ if (!btf_is_func_proto(t)) { ++ pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n", ++ map_name, btf_kind_str(t)); ++ return -EINVAL; ++ } ++ continue; ++ } + if (!btf_is_struct(t)) { + pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", + map_name, btf_kind_str(t)); +@@ -4940,7 +4951,7 @@ static int bpf_object__create_map(struct + return err; + } + +-static int init_map_slots(struct bpf_object *obj, struct bpf_map *map) ++static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) + { + const struct bpf_map *targ_map; + unsigned int i; +@@ -4952,6 +4963,7 @@ static int init_map_slots(struct bpf_obj + + targ_map = map->init_slots[i]; + fd = bpf_map__fd(targ_map); ++ + if (obj->gen_loader) { + pr_warn("// TODO map_update_elem: idx %td key %d value==map_idx %td\n", + map - obj->maps, i, targ_map - obj->maps); +@@ -4962,8 +4974,7 @@ static int init_map_slots(struct bpf_obj + if (err) { + err = -errno; + pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", +- map->name, i, targ_map->name, +- fd, err); ++ map->name, i, targ_map->name, fd, err); + return err; + } + pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", +@@ -4976,6 +4987,59 @@ static int init_map_slots(struct bpf_obj + return 0; + } + ++static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map) ++{ ++ const struct bpf_program *targ_prog; ++ unsigned int i; ++ int fd, err; ++ ++ if (obj->gen_loader) ++ return -ENOTSUP; ++ ++ for (i = 0; i < map->init_slots_sz; i++) { ++ if (!map->init_slots[i]) ++ continue; ++ ++ targ_prog = map->init_slots[i]; ++ fd = bpf_program__fd(targ_prog); ++ ++ err = bpf_map_update_elem(map->fd, &i, &fd, 0); ++ if (err) { ++ err = -errno; ++ pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n", ++ map->name, i, targ_prog->name, fd, err); ++ return err; ++ } ++ pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n", ++ map->name, i, targ_prog->name, fd); ++ } ++ ++ zfree(&map->init_slots); ++ map->init_slots_sz = 0; ++ ++ return 0; ++} ++ ++static int bpf_object_init_prog_arrays(struct bpf_object *obj) ++{ ++ struct bpf_map *map; ++ int i, err; ++ ++ for (i = 0; i < obj->nr_maps; i++) { ++ map = &obj->maps[i]; ++ ++ if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY) ++ continue; ++ ++ err = init_prog_array_slots(obj, map); ++ if (err < 0) { ++ zclose(map->fd); ++ return err; ++ } ++ } ++ return 0; ++} ++ + static int + bpf_object__create_maps(struct bpf_object *obj) + { +@@ -5042,8 +5106,8 @@ retry: + } + } + +- if (map->init_slots_sz) { +- err = init_map_slots(obj, map); ++ if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) { ++ err = init_map_in_map_slots(obj, map); + if (err < 0) { + zclose(map->fd); + goto err_out; +@@ -6189,9 +6253,11 @@ static int bpf_object__collect_map_relos + int i, j, nrels, new_sz; + const struct btf_var_secinfo *vi = NULL; + const struct btf_type *sec, *var, *def; +- struct bpf_map *map = NULL, *targ_map; ++ struct bpf_map *map = NULL, *targ_map = NULL; ++ struct bpf_program *targ_prog = NULL; ++ bool is_prog_array, is_map_in_map; + const struct btf_member *member; +- const char *name, *mname; ++ const char *name, *mname, *type; + unsigned int moff; + Elf64_Sym *sym; + Elf64_Rel *rel; +@@ -6218,11 +6284,6 @@ static int bpf_object__collect_map_relos + return -LIBBPF_ERRNO__FORMAT; + } + name = elf_sym_str(obj, sym->st_name) ?: ""; +- if (sym->st_shndx != obj->efile.btf_maps_shndx) { +- pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", +- i, name); +- return -LIBBPF_ERRNO__RELOC; +- } + + pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n", + i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value, +@@ -6244,19 +6305,45 @@ static int bpf_object__collect_map_relos + return -EINVAL; + } + +- if (!bpf_map_type__is_map_in_map(map->def.type)) +- return -EINVAL; +- if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS && +- map->def.key_size != sizeof(int)) { +- pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n", +- i, map->name, sizeof(int)); ++ is_map_in_map = bpf_map_type__is_map_in_map(map->def.type); ++ is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY; ++ type = is_map_in_map ? "map" : "prog"; ++ if (is_map_in_map) { ++ if (sym->st_shndx != obj->efile.btf_maps_shndx) { ++ pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", ++ i, name); ++ return -LIBBPF_ERRNO__RELOC; ++ } ++ if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS && ++ map->def.key_size != sizeof(int)) { ++ pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n", ++ i, map->name, sizeof(int)); ++ return -EINVAL; ++ } ++ targ_map = bpf_object__find_map_by_name(obj, name); ++ if (!targ_map) { ++ pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n", ++ i, name); ++ return -ESRCH; ++ } ++ } else if (is_prog_array) { ++ targ_prog = bpf_object__find_program_by_name(obj, name); ++ if (!targ_prog) { ++ pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n", ++ i, name); ++ return -ESRCH; ++ } ++ if (targ_prog->sec_idx != sym->st_shndx || ++ targ_prog->sec_insn_off * 8 != sym->st_value || ++ prog_is_subprog(obj, targ_prog)) { ++ pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n", ++ i, name); ++ return -LIBBPF_ERRNO__RELOC; ++ } ++ } else { + return -EINVAL; + } + +- targ_map = bpf_object__find_map_by_name(obj, name); +- if (!targ_map) +- return -ESRCH; +- + var = btf__type_by_id(obj->btf, vi->type); + def = skip_mods_and_typedefs(obj->btf, var->type, NULL); + if (btf_vlen(def) == 0) +@@ -6287,10 +6374,10 @@ static int bpf_object__collect_map_relos + (new_sz - map->init_slots_sz) * host_ptr_sz); + map->init_slots_sz = new_sz; + } +- map->init_slots[moff] = targ_map; ++ map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog; + +- pr_debug(".maps relo #%d: map '%s' slot [%d] points to map '%s'\n", +- i, map->name, moff, name); ++ pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n", ++ i, map->name, moff, type, name); + } + + return 0; +@@ -7302,6 +7389,7 @@ int bpf_object__load_xattr(struct bpf_ob + err = err ? : bpf_object__create_maps(obj); + err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : attr->target_btf_path); + err = err ? : bpf_object__load_progs(obj, attr->log_level); ++ err = err ? : bpf_object_init_prog_arrays(obj); + + if (obj->gen_loader) { + /* reset FDs */ diff --git a/patches.suse/libbpf-Turn-btf_dedup_opts-into-OPTS-based-struct.patch b/patches.suse/libbpf-Turn-btf_dedup_opts-into-OPTS-based-struct.patch new file mode 100644 index 0000000..bedcd4a --- /dev/null +++ b/patches.suse/libbpf-Turn-btf_dedup_opts-into-OPTS-based-struct.patch @@ -0,0 +1,419 @@ +From: Andrii Nakryiko +Date: Wed, 10 Nov 2021 21:36:18 -0800 +Subject: libbpf: Turn btf_dedup_opts into OPTS-based struct +Patch-mainline: v5.17-rc1 +Git-commit: 957d350a8b94133d114a9b1ac3e79f1f77100681 +References: jsc#PED-1368 + +btf__dedup() and struct btf_dedup_opts were added before we figured out +OPTS mechanism. As such, btf_dedup_opts is non-extensible without +breaking an ABI and potentially crashing user application. + +Unfortunately, btf__dedup() and btf_dedup_opts are short and succinct +names that would be great to preserve and use going forward. So we use +___libbpf_override() macro approach, used previously for bpf_prog_load() +API, to define a new btf__dedup() variant that accepts only struct btf * +and struct btf_dedup_opts * arguments, and rename the old btf__dedup() +implementation into btf__dedup_deprecated(). This keeps both source and +binary compatibility with old and new applications. + +The biggest problem was struct btf_dedup_opts, which wasn't OPTS-based, +and as such doesn't have `size_t sz;` as a first field. But btf__dedup() +is a pretty rarely used API and I believe that the only currently known +users (besides selftests) are libbpf's own bpf_linker and pahole. +Neither use case actually uses options and just passes NULL. So instead +of doing extra hacks, just rewrite struct btf_dedup_opts into OPTS-based +one, move btf_ext argument into those opts (only bpf_linker needs to +dedup btf_ext, so it's not a typical thing to specify), and drop never +used `dont_resolve_fwds` option (it was never used anywhere, AFAIK, it +makes BTF dedup much less useful and efficient). + +Just in case, for old implementation, btf__dedup_deprecated(), detect +non-NULL options and error out with helpful message, to help users +migrate, if there are any user playing with btf__dedup(). + +The last remaining piece is dedup_table_size, which is another +anachronism from very early days of BTF dedup. Since then it has been +reduced to the only valid value, 1, to request forced hash collisions. +This is only used during testing. So instead introduce a bool flag to +force collisions explicitly. + +This patch also adapts selftests to new btf__dedup() and btf_dedup_opts +use to avoid selftests breakage. + + [0] Closes: https://github.com/libbpf/libbpf/issues/281 + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211111053624.190580-4-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/btf.c | 46 ++++++++------- + tools/lib/bpf/btf.h | 20 +++++- + tools/lib/bpf/libbpf.map | 2 + tools/lib/bpf/linker.c | 4 - + tools/testing/selftests/bpf/prog_tests/btf.c | 46 ++------------- + tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c | 6 - + 6 files changed, 58 insertions(+), 66 deletions(-) + +--- a/tools/lib/bpf/btf.c ++++ b/tools/lib/bpf/btf.c +@@ -2846,8 +2846,7 @@ __u32 btf_ext__line_info_rec_size(const + + struct btf_dedup; + +-static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, +- const struct btf_dedup_opts *opts); ++static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts); + static void btf_dedup_free(struct btf_dedup *d); + static int btf_dedup_prep(struct btf_dedup *d); + static int btf_dedup_strings(struct btf_dedup *d); +@@ -2994,12 +2993,17 @@ static int btf_dedup_remap_types(struct + * deduplicating structs/unions is described in greater details in comments for + * `btf_dedup_is_equiv` function. + */ +-int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, +- const struct btf_dedup_opts *opts) ++ ++DEFAULT_VERSION(btf__dedup_v0_6_0, btf__dedup, LIBBPF_0.6.0) ++int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts) + { +- struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts); ++ struct btf_dedup *d; + int err; + ++ if (!OPTS_VALID(opts, btf_dedup_opts)) ++ return libbpf_err(-EINVAL); ++ ++ d = btf_dedup_new(btf, opts); + if (IS_ERR(d)) { + pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d)); + return libbpf_err(-EINVAL); +@@ -3051,6 +3055,19 @@ done: + return libbpf_err(err); + } + ++COMPAT_VERSION(bpf__dedup_deprecated, btf__dedup, LIBBPF_0.0.2) ++int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *unused_opts) ++{ ++ LIBBPF_OPTS(btf_dedup_opts, opts, .btf_ext = btf_ext); ++ ++ if (unused_opts) { ++ pr_warn("please use new version of btf__dedup() that supports options\n"); ++ return libbpf_err(-ENOTSUP); ++ } ++ ++ return btf__dedup(btf, &opts); ++} ++ + #define BTF_UNPROCESSED_ID ((__u32)-1) + #define BTF_IN_PROGRESS_ID ((__u32)-2) + +@@ -3163,8 +3180,7 @@ static bool btf_dedup_equal_fn(const voi + return k1 == k2; + } + +-static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, +- const struct btf_dedup_opts *opts) ++static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts) + { + struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup)); + hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn; +@@ -3173,13 +3189,11 @@ static struct btf_dedup *btf_dedup_new(s + if (!d) + return ERR_PTR(-ENOMEM); + +- d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds; +- /* dedup_table_size is now used only to force collisions in tests */ +- if (opts && opts->dedup_table_size == 1) ++ if (OPTS_GET(opts, force_collisions, false)) + hash_fn = btf_dedup_collision_hash_fn; + + d->btf = btf; +- d->btf_ext = btf_ext; ++ d->btf_ext = OPTS_GET(opts, btf_ext, NULL); + + d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL); + if (IS_ERR(d->dedup_table)) { +@@ -3708,8 +3722,6 @@ static int btf_dedup_prim_type(struct bt + new_id = cand_id; + break; + } +- if (d->opts.dont_resolve_fwds) +- continue; + if (btf_compat_enum(t, cand)) { + if (btf_is_enum_fwd(t)) { + /* resolve fwd to full enum */ +@@ -3952,8 +3964,7 @@ static int btf_dedup_is_equiv(struct btf + return 0; + + /* FWD <--> STRUCT/UNION equivalence check, if enabled */ +- if (!d->opts.dont_resolve_fwds +- && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD) ++ if ((cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD) + && cand_kind != canon_kind) { + __u16 real_kind; + __u16 fwd_kind; +@@ -3979,10 +3990,7 @@ static int btf_dedup_is_equiv(struct btf + return btf_equal_int_tag(cand_type, canon_type); + + case BTF_KIND_ENUM: +- if (d->opts.dont_resolve_fwds) +- return btf_equal_enum(cand_type, canon_type); +- else +- return btf_compat_enum(cand_type, canon_type); ++ return btf_compat_enum(cand_type, canon_type); + + case BTF_KIND_FWD: + case BTF_KIND_FLOAT: +--- a/tools/lib/bpf/btf.h ++++ b/tools/lib/bpf/btf.h +@@ -245,12 +245,24 @@ LIBBPF_API int btf__add_decl_tag(struct + int component_idx); + + struct btf_dedup_opts { +- unsigned int dedup_table_size; +- bool dont_resolve_fwds; ++ size_t sz; ++ /* optional .BTF.ext info to dedup along the main BTF info */ ++ struct btf_ext *btf_ext; ++ /* force hash collisions (used for testing) */ ++ bool force_collisions; ++ size_t :0; + }; ++#define btf_dedup_opts__last_field force_collisions + +-LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, +- const struct btf_dedup_opts *opts); ++LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts); ++ ++LIBBPF_API int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts); ++ ++LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__dedup() instead") ++LIBBPF_API int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *opts); ++#define btf__dedup(...) ___libbpf_overload(___btf_dedup, __VA_ARGS__) ++#define ___btf_dedup3(btf, btf_ext, opts) btf__dedup_deprecated(btf, btf_ext, opts) ++#define ___btf_dedup2(btf, opts) btf__dedup(btf, opts) + + struct btf_dump; + +--- a/tools/lib/bpf/libbpf.map ++++ b/tools/lib/bpf/libbpf.map +@@ -403,6 +403,8 @@ LIBBPF_0.6.0 { + bpf_program__set_extra_flags; + btf__add_btf; + btf__add_decl_tag; ++ btf__dedup; ++ btf__dedup_deprecated; + btf__raw_data; + btf__type_cnt; + } LIBBPF_0.5.0; +--- a/tools/lib/bpf/linker.c ++++ b/tools/lib/bpf/linker.c +@@ -2650,6 +2650,7 @@ static int emit_elf_data_sec(struct bpf_ + + static int finalize_btf(struct bpf_linker *linker) + { ++ LIBBPF_OPTS(btf_dedup_opts, opts); + struct btf *btf = linker->btf; + const void *raw_data; + int i, j, id, err; +@@ -2686,7 +2687,8 @@ static int finalize_btf(struct bpf_linke + return err; + } + +- err = btf__dedup(linker->btf, linker->btf_ext, NULL); ++ opts.btf_ext = linker->btf_ext; ++ err = btf__dedup(linker->btf, &opts); + if (err) { + pr_warn("BTF dedup failed: %d\n", err); + return err; +--- a/tools/testing/selftests/bpf/prog_tests/btf.c ++++ b/tools/testing/selftests/bpf/prog_tests/btf.c +@@ -6627,7 +6627,7 @@ struct btf_dedup_test { + struct btf_dedup_opts opts; + }; + +-const struct btf_dedup_test dedup_tests[] = { ++static struct btf_dedup_test dedup_tests[] = { + + { + .descr = "dedup: unused strings filtering", +@@ -6647,9 +6647,6 @@ const struct btf_dedup_test dedup_tests[ + }, + BTF_STR_SEC("\0int\0long"), + }, +- .opts = { +- .dont_resolve_fwds = false, +- }, + }, + { + .descr = "dedup: strings deduplication", +@@ -6672,9 +6669,6 @@ const struct btf_dedup_test dedup_tests[ + }, + BTF_STR_SEC("\0int\0long int"), + }, +- .opts = { +- .dont_resolve_fwds = false, +- }, + }, + { + .descr = "dedup: struct example #1", +@@ -6755,9 +6749,6 @@ const struct btf_dedup_test dedup_tests[ + }, + BTF_STR_SEC("\0a\0b\0c\0d\0int\0float\0next\0s"), + }, +- .opts = { +- .dont_resolve_fwds = false, +- }, + }, + { + .descr = "dedup: struct <-> fwd resolution w/ hash collision", +@@ -6800,8 +6791,7 @@ const struct btf_dedup_test dedup_tests[ + BTF_STR_SEC("\0s\0x"), + }, + .opts = { +- .dont_resolve_fwds = false, +- .dedup_table_size = 1, /* force hash collisions */ ++ .force_collisions = true, /* force hash collisions */ + }, + }, + { +@@ -6847,8 +6837,7 @@ const struct btf_dedup_test dedup_tests[ + BTF_STR_SEC("\0s\0x"), + }, + .opts = { +- .dont_resolve_fwds = false, +- .dedup_table_size = 1, /* force hash collisions */ ++ .force_collisions = true, /* force hash collisions */ + }, + }, + { +@@ -6911,9 +6900,6 @@ const struct btf_dedup_test dedup_tests[ + }, + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q"), + }, +- .opts = { +- .dont_resolve_fwds = false, +- }, + }, + { + .descr = "dedup: no int/float duplicates", +@@ -6965,9 +6951,6 @@ const struct btf_dedup_test dedup_tests[ + }, + BTF_STR_SEC("\0int\0some other int\0float"), + }, +- .opts = { +- .dont_resolve_fwds = false, +- }, + }, + { + .descr = "dedup: enum fwd resolution", +@@ -7009,9 +6992,6 @@ const struct btf_dedup_test dedup_tests[ + }, + BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"), + }, +- .opts = { +- .dont_resolve_fwds = false, +- }, + }, + { + .descr = "dedup: datasec and vars pass-through", +@@ -7054,8 +7034,7 @@ const struct btf_dedup_test dedup_tests[ + BTF_STR_SEC("\0.bss\0t"), + }, + .opts = { +- .dont_resolve_fwds = false, +- .dedup_table_size = 1 ++ .force_collisions = true + }, + }, + { +@@ -7099,9 +7078,6 @@ const struct btf_dedup_test dedup_tests[ + }, + BTF_STR_SEC("\0t\0a1\0a2\0f\0tag"), + }, +- .opts = { +- .dont_resolve_fwds = false, +- }, + }, + { + .descr = "dedup: func/func_param tags", +@@ -7152,9 +7128,6 @@ const struct btf_dedup_test dedup_tests[ + }, + BTF_STR_SEC("\0a1\0a2\0f\0tag1\0tag2\0tag3"), + }, +- .opts = { +- .dont_resolve_fwds = false, +- }, + }, + { + .descr = "dedup: struct/struct_member tags", +@@ -7200,9 +7173,6 @@ const struct btf_dedup_test dedup_tests[ + }, + BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"), + }, +- .opts = { +- .dont_resolve_fwds = false, +- }, + }, + { + .descr = "dedup: typedef tags", +@@ -7233,9 +7203,6 @@ const struct btf_dedup_test dedup_tests[ + }, + BTF_STR_SEC("\0t\0tag1\0tag2\0tag3"), + }, +- .opts = { +- .dont_resolve_fwds = false, +- }, + }, + + }; +@@ -7293,7 +7260,7 @@ static void dump_btf_strings(const char + + static void do_test_dedup(unsigned int test_num) + { +- const struct btf_dedup_test *test = &dedup_tests[test_num - 1]; ++ struct btf_dedup_test *test = &dedup_tests[test_num - 1]; + __u32 test_nr_types, expect_nr_types, test_btf_size, expect_btf_size; + const struct btf_header *test_hdr, *expect_hdr; + struct btf *test_btf = NULL, *expect_btf = NULL; +@@ -7337,7 +7304,8 @@ static void do_test_dedup(unsigned int t + goto done; + } + +- err = btf__dedup(test_btf, NULL, &test->opts); ++ test->opts.sz = sizeof(test->opts); ++ err = btf__dedup(test_btf, &test->opts); + if (CHECK(err, "btf_dedup failed errno:%d", err)) { + err = -1; + goto done; +--- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c ++++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c +@@ -92,7 +92,7 @@ struct s2 {\n\ + int *f3;\n\ + };\n\n", "c_dump"); + +- err = btf__dedup(btf2, NULL, NULL); ++ err = btf__dedup(btf2, NULL); + if (!ASSERT_OK(err, "btf_dedup")) + goto cleanup; + +@@ -186,7 +186,7 @@ static void test_split_fwd_resolve() { + "\t'f1' type_id=7 bits_offset=0\n" + "\t'f2' type_id=9 bits_offset=64"); + +- err = btf__dedup(btf2, NULL, NULL); ++ err = btf__dedup(btf2, NULL); + if (!ASSERT_OK(err, "btf_dedup")) + goto cleanup; + +@@ -283,7 +283,7 @@ static void test_split_struct_duped() { + "[13] STRUCT 's3' size=8 vlen=1\n" + "\t'f1' type_id=12 bits_offset=0"); + +- err = btf__dedup(btf2, NULL, NULL); ++ err = btf__dedup(btf2, NULL); + if (!ASSERT_OK(err, "btf_dedup")) + goto cleanup; + diff --git a/patches.suse/libbpf-Unify-low-level-BPF_PROG_LOAD-APIs-into-bpf_p.patch b/patches.suse/libbpf-Unify-low-level-BPF_PROG_LOAD-APIs-into-bpf_p.patch new file mode 100644 index 0000000..8c573e4 --- /dev/null +++ b/patches.suse/libbpf-Unify-low-level-BPF_PROG_LOAD-APIs-into-bpf_p.patch @@ -0,0 +1,767 @@ +From: Andrii Nakryiko +Date: Wed, 3 Nov 2021 15:08:36 -0700 +Subject: libbpf: Unify low-level BPF_PROG_LOAD APIs into bpf_prog_load() +Patch-mainline: v5.17-rc1 +Git-commit: d10ef2b825cffd0807dd733fdfd6a5bea32270d7 +References: jsc#PED-1368 + +Add a new unified OPTS-based low-level API for program loading, +bpf_prog_load() ([0]). bpf_prog_load() accepts few "mandatory" +parameters as input arguments (program type, name, license, +instructions) and all the other optional (as in not required to specify +for all types of BPF programs) fields into struct bpf_prog_load_opts. + +This makes all the other non-extensible APIs variant for BPF_PROG_LOAD +obsolete and they are slated for deprecation in libbpf v0.7: + - bpf_load_program(); + - bpf_load_program_xattr(); + - bpf_verify_program(). + +Implementation-wise, internal helper libbpf__bpf_prog_load is refactored +to become a public bpf_prog_load() API. struct bpf_prog_load_params used +internally is replaced by public struct bpf_prog_load_opts. + +Unfortunately, while conceptually all this is pretty straightforward, +the biggest complication comes from the already existing bpf_prog_load() +*high-level* API, which has nothing to do with BPF_PROG_LOAD command. + +We try really hard to have a new API named bpf_prog_load(), though, +because it maps naturally to BPF_PROG_LOAD command. + +For that, we rename old bpf_prog_load() into bpf_prog_load_deprecated() +and mark it as COMPAT_VERSION() for shared library users compiled +against old version of libbpf. Statically linked users and shared lib +users compiled against new version of libbpf headers will get "rerouted" +to bpf_prog_deprecated() through a macro helper that decides whether to +use new or old bpf_prog_load() based on number of input arguments (see +___libbpf_overload in libbpf_common.h). + +To test that existing +bpf_prog_load()-using code compiles and works as expected, I've compiled +and ran selftests as is. I had to remove (locally) selftest/bpf/Makefile +-Dbpf_prog_load=bpf_prog_test_load hack because it was conflicting with +the macro-based overload approach. I don't expect anyone else to do +something like this in practice, though. This is testing-specific way to +replace bpf_prog_load() calls with special testing variant of it, which +adds extra prog_flags value. After testing I kept this selftests hack, +but ensured that we use a new bpf_prog_load_deprecated name for this. + +This patch also marks bpf_prog_load() and bpf_prog_load_xattr() as deprecated. +bpf_object interface has to be used for working with struct bpf_program. +Libbpf doesn't support loading just a bpf_program. + +The silver lining is that when we get to libbpf 1.0 all these +complication will be gone and we'll have one clean bpf_prog_load() +low-level API with no backwards compatibility hackery surrounding it. + + [0] Closes: https://github.com/libbpf/libbpf/issues/284 + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211103220845.2676888-4-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/bpf.c | 149 ++++++++++++++++----------- + tools/lib/bpf/bpf.h | 73 ++++++++++++- + tools/lib/bpf/bpf_gen_internal.h | 8 +- + tools/lib/bpf/gen_loader.c | 30 +++--- + tools/lib/bpf/libbpf.c | 51 +++++---- + tools/lib/bpf/libbpf.h | 5 +- + tools/lib/bpf/libbpf.map | 2 + + tools/lib/bpf/libbpf_common.h | 12 +++ + tools/lib/bpf/libbpf_internal.h | 31 ------ + tools/testing/selftests/bpf/Makefile | 2 +- + 10 files changed, 223 insertions(+), 140 deletions(-) + +diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c +index 8e6a23c42560..8f2a701cb079 100644 +--- a/tools/lib/bpf/bpf.c ++++ b/tools/lib/bpf/bpf.c +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + #include "bpf.h" + #include "libbpf.h" + #include "libbpf_internal.h" +@@ -254,58 +255,91 @@ alloc_zero_tailing_info(const void *orecord, __u32 cnt, + return info; + } + +-int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) ++DEFAULT_VERSION(bpf_prog_load_v0_6_0, bpf_prog_load, LIBBPF_0.6.0) ++int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, ++ const char *prog_name, const char *license, ++ const struct bpf_insn *insns, size_t insn_cnt, ++ const struct bpf_prog_load_opts *opts) + { + void *finfo = NULL, *linfo = NULL; ++ const char *func_info, *line_info; ++ __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; ++ __u32 func_info_rec_size, line_info_rec_size; ++ int fd, attempts; + union bpf_attr attr; +- int fd; ++ char *log_buf; + +- if (!load_attr->log_buf != !load_attr->log_buf_sz) ++ if (!OPTS_VALID(opts, bpf_prog_load_opts)) + return libbpf_err(-EINVAL); + +- if (load_attr->log_level > (4 | 2 | 1) || (load_attr->log_level && !load_attr->log_buf)) ++ attempts = OPTS_GET(opts, attempts, 0); ++ if (attempts < 0) + return libbpf_err(-EINVAL); ++ if (attempts == 0) ++ attempts = PROG_LOAD_ATTEMPTS; + + memset(&attr, 0, sizeof(attr)); +- attr.prog_type = load_attr->prog_type; +- attr.expected_attach_type = load_attr->expected_attach_type; + +- if (load_attr->attach_prog_fd) +- attr.attach_prog_fd = load_attr->attach_prog_fd; ++ attr.prog_type = prog_type; ++ attr.expected_attach_type = OPTS_GET(opts, expected_attach_type, 0); ++ ++ attr.prog_btf_fd = OPTS_GET(opts, prog_btf_fd, 0); ++ attr.prog_flags = OPTS_GET(opts, prog_flags, 0); ++ attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0); ++ attr.kern_version = OPTS_GET(opts, kern_version, 0); ++ ++ if (prog_name) ++ strncat(attr.prog_name, prog_name, sizeof(attr.prog_name) - 1); ++ attr.license = ptr_to_u64(license); ++ ++ if (insn_cnt > UINT_MAX) ++ return libbpf_err(-E2BIG); ++ ++ attr.insns = ptr_to_u64(insns); ++ attr.insn_cnt = (__u32)insn_cnt; ++ ++ attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); ++ attach_btf_obj_fd = OPTS_GET(opts, attach_btf_obj_fd, 0); ++ ++ if (attach_prog_fd && attach_btf_obj_fd) ++ return libbpf_err(-EINVAL); ++ ++ attr.attach_btf_id = OPTS_GET(opts, attach_btf_id, 0); ++ if (attach_prog_fd) ++ attr.attach_prog_fd = attach_prog_fd; + else +- attr.attach_btf_obj_fd = load_attr->attach_btf_obj_fd; +- attr.attach_btf_id = load_attr->attach_btf_id; ++ attr.attach_btf_obj_fd = attach_btf_obj_fd; + +- attr.prog_ifindex = load_attr->prog_ifindex; +- attr.kern_version = load_attr->kern_version; ++ log_buf = OPTS_GET(opts, log_buf, NULL); ++ log_size = OPTS_GET(opts, log_size, 0); ++ log_level = OPTS_GET(opts, log_level, 0); + +- attr.insn_cnt = (__u32)load_attr->insn_cnt; +- attr.insns = ptr_to_u64(load_attr->insns); +- attr.license = ptr_to_u64(load_attr->license); ++ if (!!log_buf != !!log_size) ++ return libbpf_err(-EINVAL); ++ if (log_level > (4 | 2 | 1)) ++ return libbpf_err(-EINVAL); ++ if (log_level && !log_buf) ++ return libbpf_err(-EINVAL); + +- attr.log_level = load_attr->log_level; +- if (attr.log_level) { +- attr.log_buf = ptr_to_u64(load_attr->log_buf); +- attr.log_size = load_attr->log_buf_sz; +- } ++ attr.log_level = log_level; ++ attr.log_buf = ptr_to_u64(log_buf); ++ attr.log_size = log_size; + +- attr.prog_btf_fd = load_attr->prog_btf_fd; +- attr.prog_flags = load_attr->prog_flags; ++ func_info_rec_size = OPTS_GET(opts, func_info_rec_size, 0); ++ func_info = OPTS_GET(opts, func_info, NULL); ++ attr.func_info_rec_size = func_info_rec_size; ++ attr.func_info = ptr_to_u64(func_info); ++ attr.func_info_cnt = OPTS_GET(opts, func_info_cnt, 0); + +- attr.func_info_rec_size = load_attr->func_info_rec_size; +- attr.func_info_cnt = load_attr->func_info_cnt; +- attr.func_info = ptr_to_u64(load_attr->func_info); ++ line_info_rec_size = OPTS_GET(opts, line_info_rec_size, 0); ++ line_info = OPTS_GET(opts, line_info, NULL); ++ attr.line_info_rec_size = line_info_rec_size; ++ attr.line_info = ptr_to_u64(line_info); ++ attr.line_info_cnt = OPTS_GET(opts, line_info_cnt, 0); + +- attr.line_info_rec_size = load_attr->line_info_rec_size; +- attr.line_info_cnt = load_attr->line_info_cnt; +- attr.line_info = ptr_to_u64(load_attr->line_info); +- attr.fd_array = ptr_to_u64(load_attr->fd_array); ++ attr.fd_array = ptr_to_u64(OPTS_GET(opts, fd_array, NULL)); + +- if (load_attr->name) +- memcpy(attr.prog_name, load_attr->name, +- min(strlen(load_attr->name), (size_t)BPF_OBJ_NAME_LEN - 1)); +- +- fd = sys_bpf_prog_load(&attr, sizeof(attr), PROG_LOAD_ATTEMPTS); ++ fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); + if (fd >= 0) + return fd; + +@@ -315,11 +349,11 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) + */ + while (errno == E2BIG && (!finfo || !linfo)) { + if (!finfo && attr.func_info_cnt && +- attr.func_info_rec_size < load_attr->func_info_rec_size) { ++ attr.func_info_rec_size < func_info_rec_size) { + /* try with corrected func info records */ +- finfo = alloc_zero_tailing_info(load_attr->func_info, +- load_attr->func_info_cnt, +- load_attr->func_info_rec_size, ++ finfo = alloc_zero_tailing_info(func_info, ++ attr.func_info_cnt, ++ func_info_rec_size, + attr.func_info_rec_size); + if (!finfo) { + errno = E2BIG; +@@ -327,13 +361,12 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) + } + + attr.func_info = ptr_to_u64(finfo); +- attr.func_info_rec_size = load_attr->func_info_rec_size; ++ attr.func_info_rec_size = func_info_rec_size; + } else if (!linfo && attr.line_info_cnt && +- attr.line_info_rec_size < +- load_attr->line_info_rec_size) { +- linfo = alloc_zero_tailing_info(load_attr->line_info, +- load_attr->line_info_cnt, +- load_attr->line_info_rec_size, ++ attr.line_info_rec_size < line_info_rec_size) { ++ linfo = alloc_zero_tailing_info(line_info, ++ attr.line_info_cnt, ++ line_info_rec_size, + attr.line_info_rec_size); + if (!linfo) { + errno = E2BIG; +@@ -341,26 +374,26 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) + } + + attr.line_info = ptr_to_u64(linfo); +- attr.line_info_rec_size = load_attr->line_info_rec_size; ++ attr.line_info_rec_size = line_info_rec_size; + } else { + break; + } + +- fd = sys_bpf_prog_load(&attr, sizeof(attr), PROG_LOAD_ATTEMPTS); ++ fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); + if (fd >= 0) + goto done; + } + +- if (load_attr->log_level || !load_attr->log_buf) ++ if (log_level || !log_buf) + goto done; + + /* Try again with log */ +- attr.log_buf = ptr_to_u64(load_attr->log_buf); +- attr.log_size = load_attr->log_buf_sz; ++ log_buf[0] = 0; ++ attr.log_buf = ptr_to_u64(log_buf); ++ attr.log_size = log_size; + attr.log_level = 1; +- load_attr->log_buf[0] = 0; + +- fd = sys_bpf_prog_load(&attr, sizeof(attr), PROG_LOAD_ATTEMPTS); ++ fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); + done: + /* free() doesn't affect errno, so we don't need to restore it */ + free(finfo); +@@ -371,14 +404,13 @@ done: + int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, + char *log_buf, size_t log_buf_sz) + { +- struct bpf_prog_load_params p = {}; ++ LIBBPF_OPTS(bpf_prog_load_opts, p); + + if (!load_attr || !log_buf != !log_buf_sz) + return libbpf_err(-EINVAL); + +- p.prog_type = load_attr->prog_type; + p.expected_attach_type = load_attr->expected_attach_type; +- switch (p.prog_type) { ++ switch (load_attr->prog_type) { + case BPF_PROG_TYPE_STRUCT_OPS: + case BPF_PROG_TYPE_LSM: + p.attach_btf_id = load_attr->attach_btf_id; +@@ -392,12 +424,9 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, + p.prog_ifindex = load_attr->prog_ifindex; + p.kern_version = load_attr->kern_version; + } +- p.insn_cnt = load_attr->insns_cnt; +- p.insns = load_attr->insns; +- p.license = load_attr->license; + p.log_level = load_attr->log_level; + p.log_buf = log_buf; +- p.log_buf_sz = log_buf_sz; ++ p.log_size = log_buf_sz; + p.prog_btf_fd = load_attr->prog_btf_fd; + p.func_info_rec_size = load_attr->func_info_rec_size; + p.func_info_cnt = load_attr->func_info_cnt; +@@ -405,10 +434,10 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, + p.line_info_rec_size = load_attr->line_info_rec_size; + p.line_info_cnt = load_attr->line_info_cnt; + p.line_info = load_attr->line_info; +- p.name = load_attr->name; + p.prog_flags = load_attr->prog_flags; + +- return libbpf__bpf_prog_load(&p); ++ return bpf_prog_load(load_attr->prog_type, load_attr->name, load_attr->license, ++ load_attr->insns, load_attr->insns_cnt, &p); + } + + int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, +diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h +index f35146c1d9a9..079cc81ac51e 100644 +--- a/tools/lib/bpf/bpf.h ++++ b/tools/lib/bpf/bpf.h +@@ -72,6 +72,71 @@ LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type, + int inner_map_fd, int max_entries, + __u32 map_flags); + ++struct bpf_prog_load_opts { ++ size_t sz; /* size of this struct for forward/backward compatibility */ ++ ++ /* libbpf can retry BPF_PROG_LOAD command if bpf() syscall returns ++ * -EAGAIN. This field determines how many attempts libbpf has to ++ * make. If not specified, libbpf will use default value of 5. ++ */ ++ int attempts; ++ ++ enum bpf_attach_type expected_attach_type; ++ __u32 prog_btf_fd; ++ __u32 prog_flags; ++ __u32 prog_ifindex; ++ __u32 kern_version; ++ ++ __u32 attach_btf_id; ++ __u32 attach_prog_fd; ++ __u32 attach_btf_obj_fd; ++ ++ const int *fd_array; ++ ++ /* .BTF.ext func info data */ ++ const void *func_info; ++ __u32 func_info_cnt; ++ __u32 func_info_rec_size; ++ ++ /* .BTF.ext line info data */ ++ const void *line_info; ++ __u32 line_info_cnt; ++ __u32 line_info_rec_size; ++ ++ /* verifier log options */ ++ __u32 log_level; ++ __u32 log_size; ++ char *log_buf; ++}; ++#define bpf_prog_load_opts__last_field log_buf ++ ++LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type, ++ const char *prog_name, const char *license, ++ const struct bpf_insn *insns, size_t insn_cnt, ++ const struct bpf_prog_load_opts *opts); ++/* this "specialization" should go away in libbpf 1.0 */ ++LIBBPF_API int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, ++ const char *prog_name, const char *license, ++ const struct bpf_insn *insns, size_t insn_cnt, ++ const struct bpf_prog_load_opts *opts); ++ ++/* This is an elaborate way to not conflict with deprecated bpf_prog_load() ++ * API, defined in libbpf.h. Once we hit libbpf 1.0, all this will be gone. ++ * With this approach, if someone is calling bpf_prog_load() with ++ * 4 arguments, they will use the deprecated API, which keeps backwards ++ * compatibility (both source code and binary). If bpf_prog_load() is called ++ * with 6 arguments, though, it gets redirected to __bpf_prog_load. ++ * So looking forward to libbpf 1.0 when this hack will be gone and ++ * __bpf_prog_load() will be called just bpf_prog_load(). ++ */ ++#ifndef bpf_prog_load ++#define bpf_prog_load(...) ___libbpf_overload(___bpf_prog_load, __VA_ARGS__) ++#define ___bpf_prog_load4(file, type, pobj, prog_fd) \ ++ bpf_prog_load_deprecated(file, type, pobj, prog_fd) ++#define ___bpf_prog_load6(prog_type, prog_name, license, insns, insn_cnt, opts) \ ++ bpf_prog_load(prog_type, prog_name, license, insns, insn_cnt, opts) ++#endif /* bpf_prog_load */ ++ + struct bpf_load_program_attr { + enum bpf_prog_type prog_type; + enum bpf_attach_type expected_attach_type; +@@ -103,13 +168,15 @@ struct bpf_load_program_attr { + + /* Recommend log buffer size */ + #define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */ +-LIBBPF_API int +-bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, +- char *log_buf, size_t log_buf_sz); ++LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead") ++LIBBPF_API int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, ++ char *log_buf, size_t log_buf_sz); ++LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead") + LIBBPF_API int bpf_load_program(enum bpf_prog_type type, + const struct bpf_insn *insns, size_t insns_cnt, + const char *license, __u32 kern_version, + char *log_buf, size_t log_buf_sz); ++LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead") + LIBBPF_API int bpf_verify_program(enum bpf_prog_type type, + const struct bpf_insn *insns, + size_t insns_cnt, __u32 prog_flags, +diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h +index d26e5472fe50..75ca9fb857b2 100644 +--- a/tools/lib/bpf/bpf_gen_internal.h ++++ b/tools/lib/bpf/bpf_gen_internal.h +@@ -3,6 +3,8 @@ + #ifndef __BPF_GEN_INTERNAL_H + #define __BPF_GEN_INTERNAL_H + ++#include "bpf.h" ++ + struct ksym_relo_desc { + const char *name; + int kind; +@@ -50,8 +52,10 @@ int bpf_gen__finish(struct bpf_gen *gen); + void bpf_gen__free(struct bpf_gen *gen); + void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size); + void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_params *map_attr, int map_idx); +-struct bpf_prog_load_params; +-void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_attr, int prog_idx); ++void bpf_gen__prog_load(struct bpf_gen *gen, ++ enum bpf_prog_type prog_type, const char *prog_name, ++ const char *license, struct bpf_insn *insns, size_t insn_cnt, ++ struct bpf_prog_load_opts *load_attr, int prog_idx); + void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size); + void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx); + void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type); +diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c +index 2e10776b6d85..7b73f97b1fa1 100644 +--- a/tools/lib/bpf/gen_loader.c ++++ b/tools/lib/bpf/gen_loader.c +@@ -901,27 +901,27 @@ static void cleanup_relos(struct bpf_gen *gen, int insns) + } + + void bpf_gen__prog_load(struct bpf_gen *gen, +- struct bpf_prog_load_params *load_attr, int prog_idx) ++ enum bpf_prog_type prog_type, const char *prog_name, ++ const char *license, struct bpf_insn *insns, size_t insn_cnt, ++ struct bpf_prog_load_opts *load_attr, int prog_idx) + { + int attr_size = offsetofend(union bpf_attr, fd_array); +- int prog_load_attr, license, insns, func_info, line_info; ++ int prog_load_attr, license_off, insns_off, func_info, line_info; + union bpf_attr attr; + + memset(&attr, 0, attr_size); +- pr_debug("gen: prog_load: type %d insns_cnt %zd\n", +- load_attr->prog_type, load_attr->insn_cnt); ++ pr_debug("gen: prog_load: type %d insns_cnt %zd\n", prog_type, insn_cnt); + /* add license string to blob of bytes */ +- license = add_data(gen, load_attr->license, strlen(load_attr->license) + 1); ++ license_off = add_data(gen, license, strlen(license) + 1); + /* add insns to blob of bytes */ +- insns = add_data(gen, load_attr->insns, +- load_attr->insn_cnt * sizeof(struct bpf_insn)); ++ insns_off = add_data(gen, insns, insn_cnt * sizeof(struct bpf_insn)); + +- attr.prog_type = load_attr->prog_type; ++ attr.prog_type = prog_type; + attr.expected_attach_type = load_attr->expected_attach_type; + attr.attach_btf_id = load_attr->attach_btf_id; + attr.prog_ifindex = load_attr->prog_ifindex; + attr.kern_version = 0; +- attr.insn_cnt = (__u32)load_attr->insn_cnt; ++ attr.insn_cnt = (__u32)insn_cnt; + attr.prog_flags = load_attr->prog_flags; + + attr.func_info_rec_size = load_attr->func_info_rec_size; +@@ -934,15 +934,15 @@ void bpf_gen__prog_load(struct bpf_gen *gen, + line_info = add_data(gen, load_attr->line_info, + attr.line_info_cnt * attr.line_info_rec_size); + +- memcpy(attr.prog_name, load_attr->name, +- min((unsigned)strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1)); ++ memcpy(attr.prog_name, prog_name, ++ min((unsigned)strlen(prog_name), BPF_OBJ_NAME_LEN - 1)); + prog_load_attr = add_data(gen, &attr, attr_size); + + /* populate union bpf_attr with a pointer to license */ +- emit_rel_store(gen, attr_field(prog_load_attr, license), license); ++ emit_rel_store(gen, attr_field(prog_load_attr, license), license_off); + + /* populate union bpf_attr with a pointer to instructions */ +- emit_rel_store(gen, attr_field(prog_load_attr, insns), insns); ++ emit_rel_store(gen, attr_field(prog_load_attr, insns), insns_off); + + /* populate union bpf_attr with a pointer to func_info */ + emit_rel_store(gen, attr_field(prog_load_attr, func_info), func_info); +@@ -974,12 +974,12 @@ void bpf_gen__prog_load(struct bpf_gen *gen, + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, + offsetof(union bpf_attr, attach_btf_obj_fd))); + } +- emit_relos(gen, insns); ++ emit_relos(gen, insns_off); + /* emit PROG_LOAD command */ + emit_sys_bpf(gen, BPF_PROG_LOAD, prog_load_attr, attr_size); + debug_ret(gen, "prog_load %s insn_cnt %d", attr.prog_name, attr.insn_cnt); + /* successful or not, close btf module FDs used in extern ksyms and attach_btf_obj_fd */ +- cleanup_relos(gen, insns); ++ cleanup_relos(gen, insns_off); + if (gen->attach_kind) + emit_sys_close_blob(gen, + attr_field(prog_load_attr, attach_btf_obj_fd)); +diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c +index 7fcea11ecaa9..7a82b81b8859 100644 +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -221,7 +221,7 @@ struct reloc_desc { + struct bpf_sec_def; + + typedef int (*init_fn_t)(struct bpf_program *prog, long cookie); +-typedef int (*preload_fn_t)(struct bpf_program *prog, struct bpf_prog_load_params *attr, long cookie); ++typedef int (*preload_fn_t)(struct bpf_program *prog, struct bpf_prog_load_opts *opts, long cookie); + typedef struct bpf_link *(*attach_fn_t)(const struct bpf_program *prog, long cookie); + + /* stored as sec_def->cookie for all libbpf-supported SEC()s */ +@@ -6391,16 +6391,16 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac + + /* this is called as prog->sec_def->preload_fn for libbpf-supported sec_defs */ + static int libbpf_preload_prog(struct bpf_program *prog, +- struct bpf_prog_load_params *attr, long cookie) ++ struct bpf_prog_load_opts *opts, long cookie) + { + enum sec_def_flags def = cookie; + + /* old kernels might not support specifying expected_attach_type */ + if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE)) +- attr->expected_attach_type = 0; ++ opts->expected_attach_type = 0; + + if (def & SEC_SLEEPABLE) +- attr->prog_flags |= BPF_F_SLEEPABLE; ++ opts->prog_flags |= BPF_F_SLEEPABLE; + + if ((prog->type == BPF_PROG_TYPE_TRACING || + prog->type == BPF_PROG_TYPE_LSM || +@@ -6419,11 +6419,11 @@ static int libbpf_preload_prog(struct bpf_program *prog, + + /* but by now libbpf common logic is not utilizing + * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because +- * this callback is called after attrs were populated by +- * libbpf, so this callback has to update attr explicitly here ++ * this callback is called after opts were populated by ++ * libbpf, so this callback has to update opts explicitly here + */ +- attr->attach_btf_obj_fd = btf_obj_fd; +- attr->attach_btf_id = btf_type_id; ++ opts->attach_btf_obj_fd = btf_obj_fd; ++ opts->attach_btf_id = btf_type_id; + } + return 0; + } +@@ -6433,7 +6433,8 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog + const char *license, __u32 kern_version, + int *prog_fd) + { +- struct bpf_prog_load_params load_attr = {}; ++ LIBBPF_OPTS(bpf_prog_load_opts, load_attr); ++ const char *prog_name = NULL; + char *cp, errmsg[STRERR_BUFSIZE]; + size_t log_buf_size = 0; + char *log_buf = NULL; +@@ -6452,13 +6453,9 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog + if (!insns || !insns_cnt) + return -EINVAL; + +- load_attr.prog_type = prog->type; + load_attr.expected_attach_type = prog->expected_attach_type; + if (kernel_supports(obj, FEAT_PROG_NAME)) +- load_attr.name = prog->name; +- load_attr.insns = insns; +- load_attr.insn_cnt = insns_cnt; +- load_attr.license = license; ++ prog_name = prog->name; + load_attr.attach_btf_id = prog->attach_btf_id; + load_attr.attach_prog_fd = prog->attach_prog_fd; + load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd; +@@ -6492,7 +6489,8 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog + } + + if (obj->gen_loader) { +- bpf_gen__prog_load(obj->gen_loader, &load_attr, ++ bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name, ++ license, insns, insns_cnt, &load_attr, + prog - obj->programs); + *prog_fd = -1; + return 0; +@@ -6507,8 +6505,8 @@ retry_load: + } + + load_attr.log_buf = log_buf; +- load_attr.log_buf_sz = log_buf_size; +- ret = libbpf__bpf_prog_load(&load_attr); ++ load_attr.log_size = log_buf_size; ++ ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr); + + if (ret >= 0) { + if (log_buf && load_attr.log_level) +@@ -6554,19 +6552,19 @@ retry_load: + pr_warn("-- BEGIN DUMP LOG ---\n"); + pr_warn("\n%s\n", log_buf); + pr_warn("-- END LOG --\n"); +- } else if (load_attr.insn_cnt >= BPF_MAXINSNS) { +- pr_warn("Program too large (%zu insns), at most %d insns\n", +- load_attr.insn_cnt, BPF_MAXINSNS); ++ } else if (insns_cnt >= BPF_MAXINSNS) { ++ pr_warn("Program too large (%d insns), at most %d insns\n", ++ insns_cnt, BPF_MAXINSNS); + ret = -LIBBPF_ERRNO__PROG2BIG; +- } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) { ++ } else if (prog->type != BPF_PROG_TYPE_KPROBE) { + /* Wrong program type? */ + int fd; + +- load_attr.prog_type = BPF_PROG_TYPE_KPROBE; + load_attr.expected_attach_type = 0; + load_attr.log_buf = NULL; +- load_attr.log_buf_sz = 0; +- fd = libbpf__bpf_prog_load(&load_attr); ++ load_attr.log_size = 0; ++ fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, prog_name, license, ++ insns, insns_cnt, &load_attr); + if (fd >= 0) { + close(fd); + ret = -LIBBPF_ERRNO__PROGTYPE; +@@ -9170,8 +9168,9 @@ long libbpf_get_error(const void *ptr) + return -errno; + } + +-int bpf_prog_load(const char *file, enum bpf_prog_type type, +- struct bpf_object **pobj, int *prog_fd) ++COMPAT_VERSION(bpf_prog_load_deprecated, bpf_prog_load, LIBBPF_0.0.1) ++int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, ++ struct bpf_object **pobj, int *prog_fd) + { + struct bpf_prog_load_attr attr; + +diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h +index a364c379b998..bbc828667b22 100644 +--- a/tools/lib/bpf/libbpf.h ++++ b/tools/lib/bpf/libbpf.h +@@ -676,8 +676,9 @@ struct bpf_prog_load_attr { + + LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, + struct bpf_object **pobj, int *prog_fd); +-LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type, +- struct bpf_object **pobj, int *prog_fd); ++LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__open() and bpf_object__load() instead") ++LIBBPF_API int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, ++ struct bpf_object **pobj, int *prog_fd); + + /* XDP related API */ + struct xdp_link_info { +diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map +index 43580eb47740..b895861a13c0 100644 +--- a/tools/lib/bpf/libbpf.map ++++ b/tools/lib/bpf/libbpf.map +@@ -395,6 +395,8 @@ LIBBPF_0.6.0 { + bpf_object__next_program; + bpf_object__prev_map; + bpf_object__prev_program; ++ bpf_prog_load_deprecated; ++ bpf_prog_load; + bpf_program__insn_cnt; + bpf_program__insns; + btf__add_btf; +diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h +index 0967112b933a..b21cefc9c3b6 100644 +--- a/tools/lib/bpf/libbpf_common.h ++++ b/tools/lib/bpf/libbpf_common.h +@@ -41,6 +41,18 @@ + #define __LIBBPF_MARK_DEPRECATED_0_7(X) + #endif + ++/* This set of internal macros allows to do "function overloading" based on ++ * number of arguments provided by used in backwards-compatible way during the ++ * transition to libbpf 1.0 ++ * It's ugly but necessary evil that will be cleaned up when we get to 1.0. ++ * See bpf_prog_load() overload for example. ++ */ ++#define ___libbpf_cat(A, B) A ## B ++#define ___libbpf_select(NAME, NUM) ___libbpf_cat(NAME, NUM) ++#define ___libbpf_nth(_1, _2, _3, _4, _5, _6, N, ...) N ++#define ___libbpf_cnt(...) ___libbpf_nth(__VA_ARGS__, 6, 5, 4, 3, 2, 1) ++#define ___libbpf_overload(NAME, ...) ___libbpf_select(NAME, ___libbpf_cnt(__VA_ARGS__))(__VA_ARGS__) ++ + /* Helper macro to declare and initialize libbpf options struct + * + * This dance with uninitialized declaration, followed by memset to zero, +diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h +index aeb79e3a8ff9..c1e34794b829 100644 +--- a/tools/lib/bpf/libbpf_internal.h ++++ b/tools/lib/bpf/libbpf_internal.h +@@ -276,37 +276,6 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); + int libbpf__load_raw_btf(const char *raw_types, size_t types_len, + const char *str_sec, size_t str_len); + +-struct bpf_prog_load_params { +- enum bpf_prog_type prog_type; +- enum bpf_attach_type expected_attach_type; +- const char *name; +- const struct bpf_insn *insns; +- size_t insn_cnt; +- const char *license; +- __u32 kern_version; +- __u32 attach_prog_fd; +- __u32 attach_btf_obj_fd; +- __u32 attach_btf_id; +- __u32 prog_ifindex; +- __u32 prog_btf_fd; +- __u32 prog_flags; +- +- __u32 func_info_rec_size; +- const void *func_info; +- __u32 func_info_cnt; +- +- __u32 line_info_rec_size; +- const void *line_info; +- __u32 line_info_cnt; +- +- __u32 log_level; +- char *log_buf; +- size_t log_buf_sz; +- int *fd_array; +-}; +- +-int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr); +- + struct bpf_create_map_params { + const char *name; + enum bpf_map_type map_type; +diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile +index 54b0a41a3775..c4497a4af3fe 100644 +--- a/tools/testing/selftests/bpf/Makefile ++++ b/tools/testing/selftests/bpf/Makefile +@@ -24,7 +24,7 @@ SAN_CFLAGS ?= + CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS) \ + -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ + -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) \ +- -Dbpf_prog_load=bpf_prog_test_load \ ++ -Dbpf_prog_load_deprecated=bpf_prog_test_load \ + -Dbpf_load_program=bpf_test_load_program + LDLIBS += -lcap -lelf -lz -lrt -lpthread + +-- +2.38.1 + diff --git a/patches.suse/libbpf-Unify-low-level-map-creation-APIs-w-new-bpf_m.patch b/patches.suse/libbpf-Unify-low-level-map-creation-APIs-w-new-bpf_m.patch new file mode 100644 index 0000000..8419e3a --- /dev/null +++ b/patches.suse/libbpf-Unify-low-level-map-creation-APIs-w-new-bpf_m.patch @@ -0,0 +1,505 @@ +From: Andrii Nakryiko +Date: Wed, 24 Nov 2021 11:32:30 -0800 +Subject: libbpf: Unify low-level map creation APIs w/ new bpf_map_create() +Patch-mainline: v5.17-rc1 +Git-commit: 992c4225419a38663d6239bc2f525b4ac0429188 +References: jsc#PED-1368 + +Mark the entire zoo of low-level map creation APIs for deprecation in +libbpf 0.7 ([0]) and introduce a new bpf_map_create() API that is +OPTS-based (and thus future-proof) and matches the BPF_MAP_CREATE +command name. + +While at it, ensure that gen_loader sends map_extra field. Also remove +now unneeded btf_key_type_id/btf_value_type_id logic that libbpf is +doing anyways. + + [0] Closes: https://github.com/libbpf/libbpf/issues/282 + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211124193233.3115996-2-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/bpf.c | 136 ++++++++++++++++----------------------- + tools/lib/bpf/bpf.h | 33 ++++++++- + tools/lib/bpf/bpf_gen_internal.h | 5 + + tools/lib/bpf/gen_loader.c | 46 ++++--------- + tools/lib/bpf/libbpf.c | 33 ++++----- + tools/lib/bpf/libbpf.map | 1 + tools/lib/bpf/libbpf_internal.h | 21 ------ + 7 files changed, 124 insertions(+), 151 deletions(-) + +--- a/tools/lib/bpf/bpf.c ++++ b/tools/lib/bpf/bpf.c +@@ -88,146 +88,122 @@ static inline int sys_bpf_prog_load(unio + return fd; + } + +-int libbpf__bpf_create_map_xattr(const struct bpf_create_map_params *create_attr) ++int bpf_map_create(enum bpf_map_type map_type, ++ const char *map_name, ++ __u32 key_size, ++ __u32 value_size, ++ __u32 max_entries, ++ const struct bpf_map_create_opts *opts) + { ++ const size_t attr_sz = offsetofend(union bpf_attr, map_extra); + union bpf_attr attr; + int fd; + +- memset(&attr, '\0', sizeof(attr)); ++ memset(&attr, 0, attr_sz); + +- attr.map_type = create_attr->map_type; +- attr.key_size = create_attr->key_size; +- attr.value_size = create_attr->value_size; +- attr.max_entries = create_attr->max_entries; +- attr.map_flags = create_attr->map_flags; +- if (create_attr->name) +- memcpy(attr.map_name, create_attr->name, +- min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1)); +- attr.numa_node = create_attr->numa_node; +- attr.btf_fd = create_attr->btf_fd; +- attr.btf_key_type_id = create_attr->btf_key_type_id; +- attr.btf_value_type_id = create_attr->btf_value_type_id; +- attr.map_ifindex = create_attr->map_ifindex; +- if (attr.map_type == BPF_MAP_TYPE_STRUCT_OPS) +- attr.btf_vmlinux_value_type_id = +- create_attr->btf_vmlinux_value_type_id; +- else +- attr.inner_map_fd = create_attr->inner_map_fd; +- attr.map_extra = create_attr->map_extra; ++ if (!OPTS_VALID(opts, bpf_map_create_opts)) ++ return libbpf_err(-EINVAL); ++ ++ attr.map_type = map_type; ++ if (map_name) ++ strncat(attr.map_name, map_name, sizeof(attr.map_name) - 1); ++ attr.key_size = key_size; ++ attr.value_size = value_size; ++ attr.max_entries = max_entries; + +- fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, sizeof(attr)); ++ attr.btf_fd = OPTS_GET(opts, btf_fd, 0); ++ attr.btf_key_type_id = OPTS_GET(opts, btf_key_type_id, 0); ++ attr.btf_value_type_id = OPTS_GET(opts, btf_value_type_id, 0); ++ attr.btf_vmlinux_value_type_id = OPTS_GET(opts, btf_vmlinux_value_type_id, 0); ++ ++ attr.inner_map_fd = OPTS_GET(opts, inner_map_fd, 0); ++ attr.map_flags = OPTS_GET(opts, map_flags, 0); ++ attr.map_extra = OPTS_GET(opts, map_extra, 0); ++ attr.numa_node = OPTS_GET(opts, numa_node, 0); ++ attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0); ++ ++ fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); + return libbpf_err_errno(fd); + } + + int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) + { +- struct bpf_create_map_params p = {}; ++ LIBBPF_OPTS(bpf_map_create_opts, p); + +- p.map_type = create_attr->map_type; +- p.key_size = create_attr->key_size; +- p.value_size = create_attr->value_size; +- p.max_entries = create_attr->max_entries; + p.map_flags = create_attr->map_flags; +- p.name = create_attr->name; + p.numa_node = create_attr->numa_node; + p.btf_fd = create_attr->btf_fd; + p.btf_key_type_id = create_attr->btf_key_type_id; + p.btf_value_type_id = create_attr->btf_value_type_id; + p.map_ifindex = create_attr->map_ifindex; +- if (p.map_type == BPF_MAP_TYPE_STRUCT_OPS) +- p.btf_vmlinux_value_type_id = +- create_attr->btf_vmlinux_value_type_id; ++ if (create_attr->map_type == BPF_MAP_TYPE_STRUCT_OPS) ++ p.btf_vmlinux_value_type_id = create_attr->btf_vmlinux_value_type_id; + else + p.inner_map_fd = create_attr->inner_map_fd; + +- return libbpf__bpf_create_map_xattr(&p); ++ return bpf_map_create(create_attr->map_type, create_attr->name, ++ create_attr->key_size, create_attr->value_size, ++ create_attr->max_entries, &p); + } + + int bpf_create_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags, int node) + { +- struct bpf_create_map_attr map_attr = {}; ++ LIBBPF_OPTS(bpf_map_create_opts, opts); + +- map_attr.name = name; +- map_attr.map_type = map_type; +- map_attr.map_flags = map_flags; +- map_attr.key_size = key_size; +- map_attr.value_size = value_size; +- map_attr.max_entries = max_entries; ++ opts.map_flags = map_flags; + if (node >= 0) { +- map_attr.numa_node = node; +- map_attr.map_flags |= BPF_F_NUMA_NODE; ++ opts.numa_node = node; ++ opts.map_flags |= BPF_F_NUMA_NODE; + } + +- return bpf_create_map_xattr(&map_attr); ++ return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts); + } + + int bpf_create_map(enum bpf_map_type map_type, int key_size, + int value_size, int max_entries, __u32 map_flags) + { +- struct bpf_create_map_attr map_attr = {}; +- +- map_attr.map_type = map_type; +- map_attr.map_flags = map_flags; +- map_attr.key_size = key_size; +- map_attr.value_size = value_size; +- map_attr.max_entries = max_entries; ++ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); + +- return bpf_create_map_xattr(&map_attr); ++ return bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts); + } + + int bpf_create_map_name(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags) + { +- struct bpf_create_map_attr map_attr = {}; ++ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); + +- map_attr.name = name; +- map_attr.map_type = map_type; +- map_attr.map_flags = map_flags; +- map_attr.key_size = key_size; +- map_attr.value_size = value_size; +- map_attr.max_entries = max_entries; +- +- return bpf_create_map_xattr(&map_attr); ++ return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts); + } + + int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, + __u32 map_flags, int node) + { +- union bpf_attr attr; +- int fd; +- +- memset(&attr, '\0', sizeof(attr)); +- +- attr.map_type = map_type; +- attr.key_size = key_size; +- attr.value_size = 4; +- attr.inner_map_fd = inner_map_fd; +- attr.max_entries = max_entries; +- attr.map_flags = map_flags; +- if (name) +- memcpy(attr.map_name, name, +- min(strlen(name), BPF_OBJ_NAME_LEN - 1)); ++ LIBBPF_OPTS(bpf_map_create_opts, opts); + ++ opts.inner_map_fd = inner_map_fd; ++ opts.map_flags = map_flags; + if (node >= 0) { +- attr.map_flags |= BPF_F_NUMA_NODE; +- attr.numa_node = node; ++ opts.map_flags |= BPF_F_NUMA_NODE; ++ opts.numa_node = node; + } + +- fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, sizeof(attr)); +- return libbpf_err_errno(fd); ++ return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts); + } + + int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, + __u32 map_flags) + { +- return bpf_create_map_in_map_node(map_type, name, key_size, +- inner_map_fd, max_entries, map_flags, +- -1); ++ LIBBPF_OPTS(bpf_map_create_opts, opts, ++ .inner_map_fd = inner_map_fd, ++ .map_flags = map_flags, ++ ); ++ ++ return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts); + } + + static void * +--- a/tools/lib/bpf/bpf.h ++++ b/tools/lib/bpf/bpf.h +@@ -35,6 +35,30 @@ + extern "C" { + #endif + ++struct bpf_map_create_opts { ++ size_t sz; /* size of this struct for forward/backward compatibility */ ++ ++ __u32 btf_fd; ++ __u32 btf_key_type_id; ++ __u32 btf_value_type_id; ++ __u32 btf_vmlinux_value_type_id; ++ ++ int inner_map_fd; ++ int map_flags; ++ __u64 map_extra; ++ ++ int numa_node; ++ int map_ifindex; ++}; ++#define bpf_map_create_opts__last_field map_ifindex ++ ++LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, ++ const char *map_name, ++ __u32 key_size, ++ __u32 value_size, ++ __u32 max_entries, ++ const struct bpf_map_create_opts *opts); ++ + struct bpf_create_map_attr { + const char *name; + enum bpf_map_type map_type; +@@ -53,20 +77,25 @@ struct bpf_create_map_attr { + }; + }; + +-LIBBPF_API int +-bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); ++LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") ++LIBBPF_API int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); ++LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") + LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, + int max_entries, __u32 map_flags, int node); ++LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") + LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, + int max_entries, __u32 map_flags); ++LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") + LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size, + int value_size, int max_entries, __u32 map_flags); ++LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") + LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type, + const char *name, int key_size, + int inner_map_fd, int max_entries, + __u32 map_flags, int node); ++LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") + LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type, + const char *name, int key_size, + int inner_map_fd, int max_entries, +--- a/tools/lib/bpf/bpf_gen_internal.h ++++ b/tools/lib/bpf/bpf_gen_internal.h +@@ -51,7 +51,10 @@ void bpf_gen__init(struct bpf_gen *gen, + int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps); + void bpf_gen__free(struct bpf_gen *gen); + void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size); +-void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_params *map_attr, int map_idx); ++void bpf_gen__map_create(struct bpf_gen *gen, ++ enum bpf_map_type map_type, const char *map_name, ++ __u32 key_size, __u32 value_size, __u32 max_entries, ++ struct bpf_map_create_opts *map_attr, int map_idx); + void bpf_gen__prog_load(struct bpf_gen *gen, + enum bpf_prog_type prog_type, const char *prog_name, + const char *license, struct bpf_insn *insns, size_t insn_cnt, +--- a/tools/lib/bpf/gen_loader.c ++++ b/tools/lib/bpf/gen_loader.c +@@ -445,47 +445,33 @@ void bpf_gen__load_btf(struct bpf_gen *g + } + + void bpf_gen__map_create(struct bpf_gen *gen, +- struct bpf_create_map_params *map_attr, int map_idx) ++ enum bpf_map_type map_type, ++ const char *map_name, ++ __u32 key_size, __u32 value_size, __u32 max_entries, ++ struct bpf_map_create_opts *map_attr, int map_idx) + { +- int attr_size = offsetofend(union bpf_attr, btf_vmlinux_value_type_id); ++ int attr_size = offsetofend(union bpf_attr, map_extra); + bool close_inner_map_fd = false; + int map_create_attr, idx; + union bpf_attr attr; + + memset(&attr, 0, attr_size); +- attr.map_type = map_attr->map_type; +- attr.key_size = map_attr->key_size; +- attr.value_size = map_attr->value_size; ++ attr.map_type = map_type; ++ attr.key_size = key_size; ++ attr.value_size = value_size; + attr.map_flags = map_attr->map_flags; + attr.map_extra = map_attr->map_extra; +- memcpy(attr.map_name, map_attr->name, +- min((unsigned)strlen(map_attr->name), BPF_OBJ_NAME_LEN - 1)); ++ if (map_name) ++ memcpy(attr.map_name, map_name, ++ min((unsigned)strlen(map_name), BPF_OBJ_NAME_LEN - 1)); + attr.numa_node = map_attr->numa_node; + attr.map_ifindex = map_attr->map_ifindex; +- attr.max_entries = map_attr->max_entries; +- switch (attr.map_type) { +- case BPF_MAP_TYPE_PERF_EVENT_ARRAY: +- case BPF_MAP_TYPE_CGROUP_ARRAY: +- case BPF_MAP_TYPE_STACK_TRACE: +- case BPF_MAP_TYPE_ARRAY_OF_MAPS: +- case BPF_MAP_TYPE_HASH_OF_MAPS: +- case BPF_MAP_TYPE_DEVMAP: +- case BPF_MAP_TYPE_DEVMAP_HASH: +- case BPF_MAP_TYPE_CPUMAP: +- case BPF_MAP_TYPE_XSKMAP: +- case BPF_MAP_TYPE_SOCKMAP: +- case BPF_MAP_TYPE_SOCKHASH: +- case BPF_MAP_TYPE_QUEUE: +- case BPF_MAP_TYPE_STACK: +- case BPF_MAP_TYPE_RINGBUF: +- break; +- default: +- attr.btf_key_type_id = map_attr->btf_key_type_id; +- attr.btf_value_type_id = map_attr->btf_value_type_id; +- } ++ attr.max_entries = max_entries; ++ attr.btf_key_type_id = map_attr->btf_key_type_id; ++ attr.btf_value_type_id = map_attr->btf_value_type_id; + + pr_debug("gen: map_create: %s idx %d type %d value_type_id %d\n", +- attr.map_name, map_idx, map_attr->map_type, attr.btf_value_type_id); ++ attr.map_name, map_idx, map_type, attr.btf_value_type_id); + + map_create_attr = add_data(gen, &attr, attr_size); + if (attr.btf_value_type_id) +@@ -512,7 +498,7 @@ void bpf_gen__map_create(struct bpf_gen + /* emit MAP_CREATE command */ + emit_sys_bpf(gen, BPF_MAP_CREATE, map_create_attr, attr_size); + debug_ret(gen, "map_create %s idx %d type %d value_size %d value_btf_id %d", +- attr.map_name, map_idx, map_attr->map_type, attr.value_size, ++ attr.map_name, map_idx, map_type, value_size, + attr.btf_value_type_id); + emit_check_err(gen); + /* remember map_fd in the stack, if successful */ +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -4839,19 +4839,16 @@ static void bpf_map__destroy(struct bpf_ + + static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) + { +- struct bpf_create_map_params create_attr; ++ LIBBPF_OPTS(bpf_map_create_opts, create_attr); + struct bpf_map_def *def = &map->def; ++ const char *map_name = NULL; ++ __u32 max_entries; + int err = 0; + +- memset(&create_attr, 0, sizeof(create_attr)); +- + if (kernel_supports(obj, FEAT_PROG_NAME)) +- create_attr.name = map->name; ++ map_name = map->name; + create_attr.map_ifindex = map->map_ifindex; +- create_attr.map_type = def->type; + create_attr.map_flags = def->map_flags; +- create_attr.key_size = def->key_size; +- create_attr.value_size = def->value_size; + create_attr.numa_node = map->numa_node; + create_attr.map_extra = map->map_extra; + +@@ -4865,18 +4862,14 @@ static int bpf_object__create_map(struct + return nr_cpus; + } + pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); +- create_attr.max_entries = nr_cpus; ++ max_entries = nr_cpus; + } else { +- create_attr.max_entries = def->max_entries; ++ max_entries = def->max_entries; + } + + if (bpf_map__is_struct_ops(map)) +- create_attr.btf_vmlinux_value_type_id = +- map->btf_vmlinux_value_type_id; ++ create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; + +- create_attr.btf_fd = 0; +- create_attr.btf_key_type_id = 0; +- create_attr.btf_value_type_id = 0; + if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) { + create_attr.btf_fd = btf__fd(obj->btf); + create_attr.btf_key_type_id = map->btf_key_type_id; +@@ -4922,13 +4915,17 @@ static int bpf_object__create_map(struct + } + + if (obj->gen_loader) { +- bpf_gen__map_create(obj->gen_loader, &create_attr, is_inner ? -1 : map - obj->maps); ++ bpf_gen__map_create(obj->gen_loader, def->type, map_name, ++ def->key_size, def->value_size, max_entries, ++ &create_attr, is_inner ? -1 : map - obj->maps); + /* Pretend to have valid FD to pass various fd >= 0 checks. + * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. + */ + map->fd = 0; + } else { +- map->fd = libbpf__bpf_create_map_xattr(&create_attr); ++ map->fd = bpf_map_create(def->type, map_name, ++ def->key_size, def->value_size, ++ max_entries, &create_attr); + } + if (map->fd < 0 && (create_attr.btf_key_type_id || + create_attr.btf_value_type_id)) { +@@ -4943,7 +4940,9 @@ static int bpf_object__create_map(struct + create_attr.btf_value_type_id = 0; + map->btf_key_type_id = 0; + map->btf_value_type_id = 0; +- map->fd = libbpf__bpf_create_map_xattr(&create_attr); ++ map->fd = bpf_map_create(def->type, map_name, ++ def->key_size, def->value_size, ++ max_entries, &create_attr); + } + + err = map->fd < 0 ? -errno : 0; +--- a/tools/lib/bpf/libbpf.map ++++ b/tools/lib/bpf/libbpf.map +@@ -391,6 +391,7 @@ LIBBPF_0.6.0 { + global: + bpf_map__map_extra; + bpf_map__set_map_extra; ++ bpf_map_create; + bpf_object__next_map; + bpf_object__next_program; + bpf_object__prev_map; +--- a/tools/lib/bpf/libbpf_internal.h ++++ b/tools/lib/bpf/libbpf_internal.h +@@ -278,27 +278,6 @@ int parse_cpu_mask_file(const char *fcpu + int libbpf__load_raw_btf(const char *raw_types, size_t types_len, + const char *str_sec, size_t str_len); + +-struct bpf_create_map_params { +- const char *name; +- enum bpf_map_type map_type; +- __u32 map_flags; +- __u32 key_size; +- __u32 value_size; +- __u32 max_entries; +- __u32 numa_node; +- __u32 btf_fd; +- __u32 btf_key_type_id; +- __u32 btf_value_type_id; +- __u32 map_ifindex; +- union { +- __u32 inner_map_fd; +- __u32 btf_vmlinux_value_type_id; +- }; +- __u64 map_extra; +-}; +- +-int libbpf__bpf_create_map_xattr(const struct bpf_create_map_params *create_attr); +- + struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); + void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, + const char **prefix, int *kind); diff --git a/patches.suse/libbpf-Use-100-character-limit-to-make-bpf_tracing.h.patch b/patches.suse/libbpf-Use-100-character-limit-to-make-bpf_tracing.h.patch new file mode 100644 index 0000000..a1ed1b2 --- /dev/null +++ b/patches.suse/libbpf-Use-100-character-limit-to-make-bpf_tracing.h.patch @@ -0,0 +1,102 @@ +From: Andrii Nakryiko +Date: Wed, 22 Dec 2021 13:39:24 -0800 +Subject: libbpf: Use 100-character limit to make bpf_tracing.h easier to read +Patch-mainline: v5.17-rc1 +Git-commit: f60edf5b53848f2cf53e7e4b716ed8e45563bb12 +References: jsc#PED-1368 + +Improve bpf_tracing.h's macro definition readability by keeping them +single-line and better aligned. This makes it easier to follow all those +variadic patterns. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Acked-by: Yonghong Song +Link: https://lore.kernel.org/bpf/20211222213924.1869758-2-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/bpf_tracing.h | 54 +++++++++++++++++--------------------------- + 1 file changed, 22 insertions(+), 32 deletions(-) + +--- a/tools/lib/bpf/bpf_tracing.h ++++ b/tools/lib/bpf/bpf_tracing.h +@@ -302,25 +302,23 @@ struct pt_regs; + #define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N + #endif + #ifndef ___bpf_narg +-#define ___bpf_narg(...) \ +- ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) ++#define ___bpf_narg(...) ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + #endif + +-#define ___bpf_ctx_cast0() ctx +-#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] +-#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] +-#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] +-#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] +-#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] +-#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] +-#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] +-#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] +-#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] ++#define ___bpf_ctx_cast0() ctx ++#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] ++#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] ++#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] ++#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] ++#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] ++#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] ++#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] ++#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] ++#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] + #define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9] + #define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10] + #define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11] +-#define ___bpf_ctx_cast(args...) \ +- ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) ++#define ___bpf_ctx_cast(args...) ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) + + /* + * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and +@@ -353,19 +351,13 @@ ____##name(unsigned long long *ctx, ##ar + + struct pt_regs; + +-#define ___bpf_kprobe_args0() ctx +-#define ___bpf_kprobe_args1(x) \ +- ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) +-#define ___bpf_kprobe_args2(x, args...) \ +- ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) +-#define ___bpf_kprobe_args3(x, args...) \ +- ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) +-#define ___bpf_kprobe_args4(x, args...) \ +- ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) +-#define ___bpf_kprobe_args5(x, args...) \ +- ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) +-#define ___bpf_kprobe_args(args...) \ +- ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) ++#define ___bpf_kprobe_args0() ctx ++#define ___bpf_kprobe_args1(x) ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) ++#define ___bpf_kprobe_args2(x, args...) ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) ++#define ___bpf_kprobe_args3(x, args...) ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) ++#define ___bpf_kprobe_args4(x, args...) ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) ++#define ___bpf_kprobe_args5(x, args...) ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) ++#define ___bpf_kprobe_args(args...) ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) + + /* + * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for +@@ -391,11 +383,9 @@ typeof(name(0)) name(struct pt_regs *ctx + static __attribute__((always_inline)) typeof(name(0)) \ + ____##name(struct pt_regs *ctx, ##args) + +-#define ___bpf_kretprobe_args0() ctx +-#define ___bpf_kretprobe_args1(x) \ +- ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx) +-#define ___bpf_kretprobe_args(args...) \ +- ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args) ++#define ___bpf_kretprobe_args0() ctx ++#define ___bpf_kretprobe_args1(x) ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx) ++#define ___bpf_kretprobe_args(args...) ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args) + + /* + * BPF_KRETPROBE is similar to BPF_KPROBE, except, it only provides optional diff --git a/patches.suse/libbpf-Use-CO-RE-in-the-kernel-in-light-skeleton.patch b/patches.suse/libbpf-Use-CO-RE-in-the-kernel-in-light-skeleton.patch new file mode 100644 index 0000000..374e7bb --- /dev/null +++ b/patches.suse/libbpf-Use-CO-RE-in-the-kernel-in-light-skeleton.patch @@ -0,0 +1,331 @@ +From: Alexei Starovoitov +Date: Wed, 1 Dec 2021 10:10:32 -0800 +Subject: libbpf: Use CO-RE in the kernel in light skeleton. +Patch-mainline: v5.17-rc1 +Git-commit: d0e928876e30b18411b80fd2445424bc00e95745 +References: jsc#PED-1368 + +Without lskel the CO-RE relocations are processed by libbpf before any other +work is done. Instead, when lskel is needed, remember relocation as RELO_CORE +kind. Then when loader prog is generated for a given bpf program pass CO-RE +relos of that program to gen loader via bpf_gen__record_relo_core(). The gen +loader will remember them as-is and pass it later as-is into the kernel. + +The normal libbpf flow is to process CO-RE early before call relos happen. In +case of gen_loader the core relos have to be added to other relos to be copied +together when bpf static function is appended in different places to other main +bpf progs. During the copy the append_subprog_relos() will adjust insn_idx for +normal relos and for RELO_CORE kind too. When that is done each struct +reloc_desc has good relos for specific main prog. + +Signed-off-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211201181040.23337-10-alexei.starovoitov@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/bpf_gen_internal.h | 3 + + tools/lib/bpf/gen_loader.c | 41 +++++++++++++- + tools/lib/bpf/libbpf.c | 109 ++++++++++++++++++++++++++++----------- + 3 files changed, 120 insertions(+), 33 deletions(-) + +--- a/tools/lib/bpf/bpf_gen_internal.h ++++ b/tools/lib/bpf/bpf_gen_internal.h +@@ -39,6 +39,8 @@ struct bpf_gen { + int error; + struct ksym_relo_desc *relos; + int relo_cnt; ++ struct bpf_core_relo *core_relos; ++ int core_relo_cnt; + char attach_target[128]; + int attach_kind; + struct ksym_desc *ksyms; +@@ -64,5 +66,6 @@ void bpf_gen__map_freeze(struct bpf_gen + void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type); + void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, + bool is_typeless, int kind, int insn_idx); ++void bpf_gen__record_relo_core(struct bpf_gen *gen, const struct bpf_core_relo *core_relo); + + #endif +--- a/tools/lib/bpf/gen_loader.c ++++ b/tools/lib/bpf/gen_loader.c +@@ -842,6 +842,22 @@ clear_src_reg: + emit_ksym_relo_log(gen, relo, kdesc->ref); + } + ++void bpf_gen__record_relo_core(struct bpf_gen *gen, ++ const struct bpf_core_relo *core_relo) ++{ ++ struct bpf_core_relo *relos; ++ ++ relos = libbpf_reallocarray(gen->core_relos, gen->core_relo_cnt + 1, sizeof(*relos)); ++ if (!relos) { ++ gen->error = -ENOMEM; ++ return; ++ } ++ gen->core_relos = relos; ++ relos += gen->core_relo_cnt; ++ memcpy(relos, core_relo, sizeof(*relos)); ++ gen->core_relo_cnt++; ++} ++ + static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns) + { + int insn; +@@ -874,6 +890,15 @@ static void emit_relos(struct bpf_gen *g + emit_relo(gen, gen->relos + i, insns); + } + ++static void cleanup_core_relo(struct bpf_gen *gen) ++{ ++ if (!gen->core_relo_cnt) ++ return; ++ free(gen->core_relos); ++ gen->core_relo_cnt = 0; ++ gen->core_relos = NULL; ++} ++ + static void cleanup_relos(struct bpf_gen *gen, int insns) + { + int i, insn; +@@ -901,6 +926,7 @@ static void cleanup_relos(struct bpf_gen + gen->relo_cnt = 0; + gen->relos = NULL; + } ++ cleanup_core_relo(gen); + } + + void bpf_gen__prog_load(struct bpf_gen *gen, +@@ -908,12 +934,13 @@ void bpf_gen__prog_load(struct bpf_gen * + const char *license, struct bpf_insn *insns, size_t insn_cnt, + struct bpf_prog_load_opts *load_attr, int prog_idx) + { +- int attr_size = offsetofend(union bpf_attr, fd_array); +- int prog_load_attr, license_off, insns_off, func_info, line_info; ++ int prog_load_attr, license_off, insns_off, func_info, line_info, core_relos; ++ int attr_size = offsetofend(union bpf_attr, core_relo_rec_size); + union bpf_attr attr; + + memset(&attr, 0, attr_size); +- pr_debug("gen: prog_load: type %d insns_cnt %zd\n", prog_type, insn_cnt); ++ pr_debug("gen: prog_load: type %d insns_cnt %zd progi_idx %d\n", ++ prog_type, insn_cnt, prog_idx); + /* add license string to blob of bytes */ + license_off = add_data(gen, license, strlen(license) + 1); + /* add insns to blob of bytes */ +@@ -937,6 +964,11 @@ void bpf_gen__prog_load(struct bpf_gen * + line_info = add_data(gen, load_attr->line_info, + attr.line_info_cnt * attr.line_info_rec_size); + ++ attr.core_relo_rec_size = sizeof(struct bpf_core_relo); ++ attr.core_relo_cnt = gen->core_relo_cnt; ++ core_relos = add_data(gen, gen->core_relos, ++ attr.core_relo_cnt * attr.core_relo_rec_size); ++ + memcpy(attr.prog_name, prog_name, + min((unsigned)strlen(prog_name), BPF_OBJ_NAME_LEN - 1)); + prog_load_attr = add_data(gen, &attr, attr_size); +@@ -953,6 +985,9 @@ void bpf_gen__prog_load(struct bpf_gen * + /* populate union bpf_attr with a pointer to line_info */ + emit_rel_store(gen, attr_field(prog_load_attr, line_info), line_info); + ++ /* populate union bpf_attr with a pointer to core_relos */ ++ emit_rel_store(gen, attr_field(prog_load_attr, core_relos), core_relos); ++ + /* populate union bpf_attr fd_array with a pointer to data where map_fds are saved */ + emit_rel_store(gen, attr_field(prog_load_attr, fd_array), gen->fd_array); + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -230,13 +230,19 @@ enum reloc_type { + RELO_EXTERN_VAR, + RELO_EXTERN_FUNC, + RELO_SUBPROG_ADDR, ++ RELO_CORE, + }; + + struct reloc_desc { + enum reloc_type type; + int insn_idx; +- int map_idx; +- int sym_off; ++ union { ++ const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */ ++ struct { ++ int map_idx; ++ int sym_off; ++ }; ++ }; + }; + + struct bpf_sec_def; +@@ -5485,6 +5491,24 @@ static void *u32_as_hash_key(__u32 x) + return (void *)(uintptr_t)x; + } + ++static int record_relo_core(struct bpf_program *prog, ++ const struct bpf_core_relo *core_relo, int insn_idx) ++{ ++ struct reloc_desc *relos, *relo; ++ ++ relos = libbpf_reallocarray(prog->reloc_desc, ++ prog->nr_reloc + 1, sizeof(*relos)); ++ if (!relos) ++ return -ENOMEM; ++ relo = &relos[prog->nr_reloc]; ++ relo->type = RELO_CORE; ++ relo->insn_idx = insn_idx; ++ relo->core_relo = core_relo; ++ prog->reloc_desc = relos; ++ prog->nr_reloc++; ++ return 0; ++} ++ + static int bpf_core_apply_relo(struct bpf_program *prog, + const struct bpf_core_relo *relo, + int relo_idx, +@@ -5521,10 +5545,12 @@ static int bpf_core_apply_relo(struct bp + return -EINVAL; + + if (prog->obj->gen_loader) { +- pr_warn("// TODO core_relo: prog %td insn[%d] %s kind %d\n", ++ const char *spec_str = btf__name_by_offset(local_btf, relo->access_str_off); ++ ++ pr_debug("record_relo_core: prog %td insn[%d] %s %s %s final insn_idx %d\n", + prog - prog->obj->programs, relo->insn_off / 8, +- local_name, relo->kind); +- return -ENOTSUP; ++ btf_kind_str(local_type), local_name, spec_str, insn_idx); ++ return record_relo_core(prog, relo, insn_idx); + } + + if (relo->kind != BPF_CORE_TYPE_ID_LOCAL && +@@ -5729,6 +5755,9 @@ bpf_object__relocate_data(struct bpf_obj + case RELO_CALL: + /* handled already */ + break; ++ case RELO_CORE: ++ /* will be handled by bpf_program_record_relos() */ ++ break; + default: + pr_warn("prog '%s': relo #%d: bad relo type %d\n", + prog->name, i, relo->type); +@@ -6169,6 +6198,35 @@ bpf_object__free_relocs(struct bpf_objec + } + } + ++static int cmp_relocs(const void *_a, const void *_b) ++{ ++ const struct reloc_desc *a = _a; ++ const struct reloc_desc *b = _b; ++ ++ if (a->insn_idx != b->insn_idx) ++ return a->insn_idx < b->insn_idx ? -1 : 1; ++ ++ /* no two relocations should have the same insn_idx, but ... */ ++ if (a->type != b->type) ++ return a->type < b->type ? -1 : 1; ++ ++ return 0; ++} ++ ++static void bpf_object__sort_relos(struct bpf_object *obj) ++{ ++ int i; ++ ++ for (i = 0; i < obj->nr_programs; i++) { ++ struct bpf_program *p = &obj->programs[i]; ++ ++ if (!p->nr_reloc) ++ continue; ++ ++ qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs); ++ } ++} ++ + static int + bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) + { +@@ -6183,6 +6241,8 @@ bpf_object__relocate(struct bpf_object * + err); + return err; + } ++ if (obj->gen_loader) ++ bpf_object__sort_relos(obj); + } + + /* Before relocating calls pre-process relocations and mark +@@ -6387,21 +6447,6 @@ static int bpf_object__collect_map_relos + return 0; + } + +-static int cmp_relocs(const void *_a, const void *_b) +-{ +- const struct reloc_desc *a = _a; +- const struct reloc_desc *b = _b; +- +- if (a->insn_idx != b->insn_idx) +- return a->insn_idx < b->insn_idx ? -1 : 1; +- +- /* no two relocations should have the same insn_idx, but ... */ +- if (a->type != b->type) +- return a->type < b->type ? -1 : 1; +- +- return 0; +-} +- + static int bpf_object__collect_relos(struct bpf_object *obj) + { + int i, err; +@@ -6434,14 +6479,7 @@ static int bpf_object__collect_relos(str + return err; + } + +- for (i = 0; i < obj->nr_programs; i++) { +- struct bpf_program *p = &obj->programs[i]; +- +- if (!p->nr_reloc) +- continue; +- +- qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs); +- } ++ bpf_object__sort_relos(obj); + return 0; + } + +@@ -6683,7 +6721,7 @@ out: + return ret; + } + +-static int bpf_program__record_externs(struct bpf_program *prog) ++static int bpf_program_record_relos(struct bpf_program *prog) + { + struct bpf_object *obj = prog->obj; + int i; +@@ -6705,6 +6743,17 @@ static int bpf_program__record_externs(s + ext->is_weak, false, BTF_KIND_FUNC, + relo->insn_idx); + break; ++ case RELO_CORE: { ++ struct bpf_core_relo cr = { ++ .insn_off = relo->insn_idx * 8, ++ .type_id = relo->core_relo->type_id, ++ .access_str_off = relo->core_relo->access_str_off, ++ .kind = relo->core_relo->kind, ++ }; ++ ++ bpf_gen__record_relo_core(obj->gen_loader, &cr); ++ break; ++ } + default: + continue; + } +@@ -6744,7 +6793,7 @@ static int bpf_object_load_prog(struct b + prog->name, prog->instances.nr); + } + if (obj->gen_loader) +- bpf_program__record_externs(prog); ++ bpf_program_record_relos(prog); + err = bpf_object_load_prog_instance(obj, prog, + prog->insns, prog->insns_cnt, + license, kern_ver, &fd); diff --git a/patches.suse/libbpf-Use-__u32-fields-in-bpf_map_create_opts.patch b/patches.suse/libbpf-Use-__u32-fields-in-bpf_map_create_opts.patch new file mode 100644 index 0000000..1f25874 --- /dev/null +++ b/patches.suse/libbpf-Use-__u32-fields-in-bpf_map_create_opts.patch @@ -0,0 +1,38 @@ +From: Andrii Nakryiko +Date: Wed, 1 Dec 2021 15:28:16 -0800 +Subject: libbpf: Use __u32 fields in bpf_map_create_opts +Patch-mainline: v5.17-rc1 +Git-commit: 74d9807023573ba2d82ec3f505f6aa0c7076918c +References: jsc#PED-1368 + +Corresponding Linux UAPI struct uses __u32, not int, so keep it +consistent. + +Fixes: 992c4225419a ("libbpf: Unify low-level map creation APIs w/ new bpf_map_create()") +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211201232824.3166325-2-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/bpf.h | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/tools/lib/bpf/bpf.h ++++ b/tools/lib/bpf/bpf.h +@@ -43,12 +43,12 @@ struct bpf_map_create_opts { + __u32 btf_value_type_id; + __u32 btf_vmlinux_value_type_id; + +- int inner_map_fd; +- int map_flags; ++ __u32 inner_map_fd; ++ __u32 map_flags; + __u64 map_extra; + +- int numa_node; +- int map_ifindex; ++ __u32 numa_node; ++ __u32 map_ifindex; + }; + #define bpf_map_create_opts__last_field map_ifindex + diff --git a/patches.suse/libbpf-Use-bpf_map_create-consistently-internally.patch b/patches.suse/libbpf-Use-bpf_map_create-consistently-internally.patch new file mode 100644 index 0000000..1f4fd4d --- /dev/null +++ b/patches.suse/libbpf-Use-bpf_map_create-consistently-internally.patch @@ -0,0 +1,196 @@ +From: Andrii Nakryiko +Date: Wed, 24 Nov 2021 11:32:31 -0800 +Subject: libbpf: Use bpf_map_create() consistently internally +Patch-mainline: v5.17-rc1 +Git-commit: a9606f405f2c8f24751b0a7326655a657a63ad60 +References: jsc#PED-1368 + +Remove all the remaining uses of to-be-deprecated bpf_create_map*() APIs. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211124193233.3115996-3-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 30 ++++++------------------------ + tools/lib/bpf/libbpf_probes.c | 30 +++++++++++++++--------------- + tools/lib/bpf/skel_internal.h | 3 +-- + tools/lib/bpf/xsk.c | 13 +++---------- + 4 files changed, 25 insertions(+), 51 deletions(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -4361,7 +4361,6 @@ static int probe_kern_prog_name(void) + + static int probe_kern_global_data(void) + { +- struct bpf_create_map_attr map_attr; + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), +@@ -4371,13 +4370,7 @@ static int probe_kern_global_data(void) + }; + int ret, map, insn_cnt = ARRAY_SIZE(insns); + +- memset(&map_attr, 0, sizeof(map_attr)); +- map_attr.map_type = BPF_MAP_TYPE_ARRAY; +- map_attr.key_size = sizeof(int); +- map_attr.value_size = 32; +- map_attr.max_entries = 1; +- +- map = bpf_create_map_xattr(&map_attr); ++ map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL); + if (map < 0) { + ret = -errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); +@@ -4507,15 +4500,11 @@ static int probe_kern_btf_type_tag(void) + + static int probe_kern_array_mmap(void) + { +- struct bpf_create_map_attr attr = { +- .map_type = BPF_MAP_TYPE_ARRAY, +- .map_flags = BPF_F_MMAPABLE, +- .key_size = sizeof(int), +- .value_size = sizeof(int), +- .max_entries = 1, +- }; ++ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); ++ int fd; + +- return probe_fd(bpf_create_map_xattr(&attr)); ++ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(int), 1, &opts); ++ return probe_fd(fd); + } + + static int probe_kern_exp_attach_type(void) +@@ -4554,7 +4543,6 @@ static int probe_kern_probe_read_kernel( + + static int probe_prog_bind_map(void) + { +- struct bpf_create_map_attr map_attr; + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), +@@ -4562,13 +4550,7 @@ static int probe_prog_bind_map(void) + }; + int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); + +- memset(&map_attr, 0, sizeof(map_attr)); +- map_attr.map_type = BPF_MAP_TYPE_ARRAY; +- map_attr.key_size = sizeof(int); +- map_attr.value_size = 32; +- map_attr.max_entries = 1; +- +- map = bpf_create_map_xattr(&map_attr); ++ map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL); + if (map < 0) { + ret = -errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); +--- a/tools/lib/bpf/libbpf_probes.c ++++ b/tools/lib/bpf/libbpf_probes.c +@@ -201,7 +201,6 @@ bool bpf_probe_map_type(enum bpf_map_typ + { + int key_size, value_size, max_entries, map_flags; + __u32 btf_key_type_id = 0, btf_value_type_id = 0; +- struct bpf_create_map_attr attr = {}; + int fd = -1, btf_fd = -1, fd_inner; + + key_size = sizeof(__u32); +@@ -271,34 +270,35 @@ bool bpf_probe_map_type(enum bpf_map_typ + + if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || + map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { ++ LIBBPF_OPTS(bpf_map_create_opts, opts); ++ + /* TODO: probe for device, once libbpf has a function to create + * map-in-map for offload + */ + if (ifindex) + return false; + +- fd_inner = bpf_create_map(BPF_MAP_TYPE_HASH, +- sizeof(__u32), sizeof(__u32), 1, 0); ++ fd_inner = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, ++ sizeof(__u32), sizeof(__u32), 1, NULL); + if (fd_inner < 0) + return false; +- fd = bpf_create_map_in_map(map_type, NULL, sizeof(__u32), +- fd_inner, 1, 0); ++ ++ opts.inner_map_fd = fd_inner; ++ fd = bpf_map_create(map_type, NULL, sizeof(__u32), sizeof(__u32), 1, &opts); + close(fd_inner); + } else { ++ LIBBPF_OPTS(bpf_map_create_opts, opts); ++ + /* Note: No other restriction on map type probes for offload */ +- attr.map_type = map_type; +- attr.key_size = key_size; +- attr.value_size = value_size; +- attr.max_entries = max_entries; +- attr.map_flags = map_flags; +- attr.map_ifindex = ifindex; ++ opts.map_flags = map_flags; ++ opts.map_ifindex = ifindex; + if (btf_fd >= 0) { +- attr.btf_fd = btf_fd; +- attr.btf_key_type_id = btf_key_type_id; +- attr.btf_value_type_id = btf_value_type_id; ++ opts.btf_fd = btf_fd; ++ opts.btf_key_type_id = btf_key_type_id; ++ opts.btf_value_type_id = btf_value_type_id; + } + +- fd = bpf_create_map_xattr(&attr); ++ fd = bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts); + } + if (fd >= 0) + close(fd); +--- a/tools/lib/bpf/skel_internal.h ++++ b/tools/lib/bpf/skel_internal.h +@@ -65,8 +65,7 @@ static inline int bpf_load_and_run(struc + int map_fd = -1, prog_fd = -1, key = 0, err; + union bpf_attr attr; + +- map_fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, +- opts->data_sz, 1, 0); ++ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1, NULL); + if (map_fd < 0) { + opts->errstr = "failed to create loader map"; + err = -errno; +--- a/tools/lib/bpf/xsk.c ++++ b/tools/lib/bpf/xsk.c +@@ -364,7 +364,6 @@ int xsk_umem__create_v0_0_2(struct xsk_u + static enum xsk_prog get_xsk_prog(void) + { + enum xsk_prog detected = XSK_PROG_FALLBACK; +- struct bpf_create_map_attr map_attr; + __u32 size_out, retval, duration; + char data_in = 0, data_out; + struct bpf_insn insns[] = { +@@ -376,13 +375,7 @@ static enum xsk_prog get_xsk_prog(void) + }; + int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns); + +- memset(&map_attr, 0, sizeof(map_attr)); +- map_attr.map_type = BPF_MAP_TYPE_XSKMAP; +- map_attr.key_size = sizeof(int); +- map_attr.value_size = sizeof(int); +- map_attr.max_entries = 1; +- +- map_fd = bpf_create_map_xattr(&map_attr); ++ map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, NULL, sizeof(int), sizeof(int), 1, NULL); + if (map_fd < 0) + return detected; + +@@ -586,8 +579,8 @@ static int xsk_create_bpf_maps(struct xs + if (max_queues < 0) + return max_queues; + +- fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map", +- sizeof(int), sizeof(int), max_queues, 0); ++ fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xsks_map", ++ sizeof(int), sizeof(int), max_queues, NULL); + if (fd < 0) + return fd; + diff --git a/patches.suse/libbpf-Use-probe_name-for-legacy-kprobe.patch b/patches.suse/libbpf-Use-probe_name-for-legacy-kprobe.patch new file mode 100644 index 0000000..0cc57cc --- /dev/null +++ b/patches.suse/libbpf-Use-probe_name-for-legacy-kprobe.patch @@ -0,0 +1,34 @@ +From: Qiang Wang +Date: Mon, 27 Dec 2021 21:07:12 +0800 +Subject: libbpf: Use probe_name for legacy kprobe +Patch-mainline: v5.17-rc1 +Git-commit: 71cff670baff5cc6a6eeb0181e2cc55579c5e1e0 +References: jsc#PED-1368 + +Fix a bug in commit 46ed5fc33db9, which wrongly used the +func_name instead of probe_name to register legacy kprobe. + +Fixes: 46ed5fc33db9 ("libbpf: Refactor and simplify legacy kprobe code") +Co-developed-by: Chengming Zhou +Signed-off-by: Qiang Wang +Signed-off-by: Chengming Zhou +Signed-off-by: Andrii Nakryiko +Tested-by: Hengqi Chen +Reviewed-by: Hengqi Chen +Link: https://lore.kernel.org/bpf/20211227130713.66933-1-wangqiang.wq.frank@bytedance.com +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -10015,7 +10015,7 @@ bpf_program__attach_kprobe_opts(const st + gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), + func_name, offset); + +- legacy_probe = strdup(func_name); ++ legacy_probe = strdup(probe_name); + if (!legacy_probe) + return libbpf_err_ptr(-ENOMEM); + diff --git a/patches.suse/libbpf-Validate-that-.BTF-and-.BTF.ext-sections-cont.patch b/patches.suse/libbpf-Validate-that-.BTF-and-.BTF.ext-sections-cont.patch new file mode 100644 index 0000000..a13a459 --- /dev/null +++ b/patches.suse/libbpf-Validate-that-.BTF-and-.BTF.ext-sections-cont.patch @@ -0,0 +1,36 @@ +From: Andrii Nakryiko +Date: Wed, 3 Nov 2021 10:32:11 -0700 +Subject: libbpf: Validate that .BTF and .BTF.ext sections contain data +Patch-mainline: v5.17-rc1 +Git-commit: 62554d52e71797eefa3fc15b54008038837bb2d4 +References: jsc#PED-1368 + +.BTF and .BTF.ext ELF sections should have SHT_PROGBITS type and contain +data. If they are not, ELF is invalid or corrupted, so bail out. +Otherwise this can lead to data->d_buf being NULL and SIGSEGV later on. +Reported by oss-fuzz project. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Acked-by: Yonghong Song +Link: https://lore.kernel.org/bpf/20211103173213.1376990-4-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/libbpf.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/tools/lib/bpf/libbpf.c ++++ b/tools/lib/bpf/libbpf.c +@@ -3270,8 +3270,12 @@ static int bpf_object__elf_collect(struc + } else if (strcmp(name, MAPS_ELF_SEC) == 0) { + obj->efile.btf_maps_shndx = idx; + } else if (strcmp(name, BTF_ELF_SEC) == 0) { ++ if (sh->sh_type != SHT_PROGBITS) ++ return -LIBBPF_ERRNO__FORMAT; + btf_data = data; + } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { ++ if (sh->sh_type != SHT_PROGBITS) ++ return -LIBBPF_ERRNO__FORMAT; + btf_ext_data = data; + } else if (sh->sh_type == SHT_SYMTAB) { + /* already processed during the first pass above */ diff --git a/patches.suse/loop-Check-for-overflow-while-configuring-loop.patch b/patches.suse/loop-Check-for-overflow-while-configuring-loop.patch index ffa584b..099340e 100644 --- a/patches.suse/loop-Check-for-overflow-while-configuring-loop.patch +++ b/patches.suse/loop-Check-for-overflow-while-configuring-loop.patch @@ -4,7 +4,7 @@ Date: Tue, 23 Aug 2022 21:38:10 +0530 Subject: [PATCH] loop: Check for overflow while configuring loop Git-commit: c490a0b5a4f36da3918181a8acdc6991d967c5f3 Patch-mainline: v6.0-rc3 -References: git-fies +References: git-fixes The userspace can configure a loop using an ioctl call, wherein a configuration of type loop_config is passed (see lo_ioctl()'s @@ -55,5 +55,5 @@ Acked-by: Takashi Iwai + return -EOVERFLOW; + memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); - memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); lo->lo_file_name[LO_NAME_SIZE-1] = 0; + lo->lo_flags = info->lo_flags; diff --git a/patches.suse/loop-Remove-the-unnecessary-bdev-checks-and-unused-b.patch b/patches.suse/loop-Remove-the-unnecessary-bdev-checks-and-unused-b.patch new file mode 100644 index 0000000..278e7c8 --- /dev/null +++ b/patches.suse/loop-Remove-the-unnecessary-bdev-checks-and-unused-b.patch @@ -0,0 +1,57 @@ +From: Xie Yongji +Date: Wed, 22 Sep 2021 20:37:10 +0800 +Subject: [PATCH] loop: Remove the unnecessary bdev checks and unused bdev + variable +Git-commit: 19f553db2ac03cb8407ec8efb8e140951afdfb87 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +The lo->lo_device can't be null if the lo->lo_backing_file is set. +So let's remove the unnecessary bdev checks and the entire bdev +variable in __loop_clr_fd() since the lo->lo_backing_file is already +checked before. + +Signed-off-by: Xie Yongji +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20210922123711.187-4-xieyongji@bytedance.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/loop.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +diff --git a/drivers/block/loop.c b/drivers/block/loop.c +index eab6906326cc..980b538c008a 100644 +--- a/drivers/block/loop.c ++++ b/drivers/block/loop.c +@@ -1329,7 +1329,6 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) + { + struct file *filp = NULL; + gfp_t gfp = lo->old_gfp_mask; +- struct block_device *bdev = lo->lo_device; + int err = 0; + bool partscan = false; + int lo_number; +@@ -1397,16 +1396,14 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) + blk_queue_io_min(lo->lo_queue, 512); + invalidate_disk(lo->lo_disk); + loop_sysfs_exit(lo); +- if (bdev) { +- /* let user-space know about this change */ +- kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); +- } ++ /* let user-space know about this change */ ++ kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); + mapping_set_gfp_mask(filp->f_mapping, gfp); + /* This is safe: open() is still holding a reference. */ + module_put(THIS_MODULE); + blk_mq_unfreeze_queue(lo->lo_queue); + +- partscan = lo->lo_flags & LO_FLAGS_PARTSCAN && bdev; ++ partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; + lo_number = lo->lo_number; + disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE); + out_unlock: +-- +2.35.3 + diff --git a/patches.suse/loop-Use-invalidate_disk-helper-to-invalidate-gendis.patch b/patches.suse/loop-Use-invalidate_disk-helper-to-invalidate-gendis.patch new file mode 100644 index 0000000..f2abb04 --- /dev/null +++ b/patches.suse/loop-Use-invalidate_disk-helper-to-invalidate-gendis.patch @@ -0,0 +1,39 @@ +From: Xie Yongji +Date: Wed, 22 Sep 2021 20:37:09 +0800 +Subject: [PATCH] loop: Use invalidate_disk() helper to invalidate gendisk +Git-commit: e515be8f3b3e63be4c5e91dc6620483ed0990a0c +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use invalidate_disk() helper to simplify the code for gendisk +invalidation. + +Signed-off-by: Xie Yongji +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20210922123711.187-3-xieyongji@bytedance.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/loop.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +diff --git a/drivers/block/loop.c b/drivers/block/loop.c +index 7bf4686af774..eab6906326cc 100644 +--- a/drivers/block/loop.c ++++ b/drivers/block/loop.c +@@ -1395,11 +1395,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) + blk_queue_logical_block_size(lo->lo_queue, 512); + blk_queue_physical_block_size(lo->lo_queue, 512); + blk_queue_io_min(lo->lo_queue, 512); +- if (bdev) { +- invalidate_bdev(bdev); +- bdev->bd_inode->i_mapping->wb_err = 0; +- } +- set_capacity(lo->lo_disk, 0); ++ invalidate_disk(lo->lo_disk); + loop_sysfs_exit(lo); + if (bdev) { + /* let user-space know about this change */ +-- +2.35.3 + diff --git a/patches.suse/loop-add-error-handling-support-for-add_disk.patch b/patches.suse/loop-add-error-handling-support-for-add_disk.patch new file mode 100644 index 0000000..d459257 --- /dev/null +++ b/patches.suse/loop-add-error-handling-support-for-add_disk.patch @@ -0,0 +1,48 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 14:59:57 -0700 +Subject: [PATCH] loop: add error handling support for add_disk() +Git-commit: 905705f083a936e49d5259e0bb539c53d5e0a9be +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +Reviewed-by: Christoph Hellwig +Reviewed-by: Hannes Reinecke +Signed-off-by: Luis Chamberlain +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/loop.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/drivers/block/loop.c b/drivers/block/loop.c +index 7bf4686af774..00ee365ed5e0 100644 +--- a/drivers/block/loop.c ++++ b/drivers/block/loop.c +@@ -2394,13 +2394,19 @@ static int loop_add(int i) + disk->event_flags = DISK_EVENT_FLAG_UEVENT; + sprintf(disk->disk_name, "loop%d", i); + /* Make this loop device reachable from pathname. */ +- add_disk(disk); ++ err = add_disk(disk); ++ if (err) ++ goto out_cleanup_disk; ++ + /* Show this loop device. */ + mutex_lock(&loop_ctl_mutex); + lo->idr_visible = true; + mutex_unlock(&loop_ctl_mutex); ++ + return i; + ++out_cleanup_disk: ++ blk_cleanup_disk(disk); + out_cleanup_tags: + blk_mq_free_tag_set(&lo->tag_set); + out_free_idr: +-- +2.35.3 + diff --git a/patches.suse/m68k-emu-nfblock-add-error-handling-support-for-add_.patch b/patches.suse/m68k-emu-nfblock-add-error-handling-support-for-add_.patch new file mode 100644 index 0000000..7c89c9e --- /dev/null +++ b/patches.suse/m68k-emu-nfblock-add-error-handling-support-for-add_.patch @@ -0,0 +1,59 @@ +From: Luis Chamberlain +Date: Fri, 15 Oct 2021 16:30:25 -0700 +Subject: [PATCH] m68k/emu/nfblock: add error handling support for add_disk() +Git-commit: 21fd880d3da7564bab68979417cab7408e4f9642 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +Reviewed-by: Geert Uytterhoeven +Acked-by: Geert Uytterhoeven +Signed-off-by: Luis Chamberlain +Link: https://lore.kernel.org/r/20211015233028.2167651-7-mcgrof@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + arch/m68k/emu/nfblock.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c +index 4ef457ba5220..9c57b245dc12 100644 +--- a/arch/m68k/emu/nfblock.c ++++ b/arch/m68k/emu/nfblock.c +@@ -99,6 +99,7 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize) + { + struct nfhd_device *dev; + int dev_id = id - NFHD_DEV_OFFSET; ++ int err = -ENOMEM; + + pr_info("nfhd%u: found device with %u blocks (%u bytes)\n", dev_id, + blocks, bsize); +@@ -129,16 +130,20 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize) + sprintf(dev->disk->disk_name, "nfhd%u", dev_id); + set_capacity(dev->disk, (sector_t)blocks * (bsize / 512)); + blk_queue_logical_block_size(dev->disk->queue, bsize); +- add_disk(dev->disk); ++ err = add_disk(dev->disk); ++ if (err) ++ goto out_cleanup_disk; + + list_add_tail(&dev->list, &nfhd_list); + + return 0; + ++out_cleanup_disk: ++ blk_cleanup_disk(dev->disk); + free_dev: + kfree(dev); + out: +- return -ENOMEM; ++ return err; + } + + static int __init nfhd_init(void) +-- +2.35.3 + diff --git a/patches.suse/mac80211-always-allocate-struct-ieee802_11_elems.patch b/patches.suse/mac80211-always-allocate-struct-ieee802_11_elems.patch index 483daa3..ecdcdf2 100644 --- a/patches.suse/mac80211-always-allocate-struct-ieee802_11_elems.patch +++ b/patches.suse/mac80211-always-allocate-struct-ieee802_11_elems.patch @@ -3,8 +3,8 @@ From: Johannes Berg Date: Mon, 20 Sep 2021 15:40:10 +0200 Subject: [PATCH] mac80211: always allocate struct ieee802_11_elems Git-commit: 5d24828d05f37ad770599de00b53d5386e35aa61 -Patch-mainline: v5.16-rc1 References: CVE-2022-42719 bsc#1204051 +Patch-mainline: v5.16-rc1 As the 802.11 spec evolves, we need to parse more and more elements. This is causing the struct to grow, and we can no @@ -32,23 +32,25 @@ other functions, sometimes with multiple levels. Link: https://lore.kernel.org/r/20210920154009.26caff6b5998.I05ae58768e990e611aee8eca8abefd9d7bc15e05@changeid Signed-off-by: Johannes Berg Acked-by: Takashi Iwai - +Signed-off-by: Oliver Neukum --- - net/mac80211/agg-rx.c | 11 +- - net/mac80211/ibss.c | 27 ++++-- - net/mac80211/ieee80211_i.h | 22 ++--- - net/mac80211/mesh.c | 86 ++++++++++++--------- - net/mac80211/mesh_hwmp.c | 44 ++++++----- - net/mac80211/mesh_plink.c | 11 +- - net/mac80211/mlme.c | 176 +++++++++++++++++++++++++-------------------- - net/mac80211/scan.c | 16 ++-- - net/mac80211/tdls.c | 63 +++++++++------- - net/mac80211/util.c | 20 +++-- - 10 files changed, 274 insertions(+), 202 deletions(-) + net/mac80211/agg-rx.c | 11 +-- + net/mac80211/ibss.c | 25 +++--- + net/mac80211/ieee80211_i.h | 22 ++--- + net/mac80211/mesh.c | 85 ++++++++++-------- + net/mac80211/mesh_hwmp.c | 44 +++++----- + net/mac80211/mesh_plink.c | 11 +-- + net/mac80211/mlme.c | 176 +++++++++++++++++++++---------------- + net/mac80211/scan.c | 16 ++-- + net/mac80211/tdls.c | 63 +++++++------ + net/mac80211/util.c | 20 +++-- + 10 files changed, 272 insertions(+), 201 deletions(-) +diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c +index cce28e3b2232..94c65def102c 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c -@@ -478,7 +478,7 @@ void ieee80211_process_addba_request(str +@@ -477,7 +477,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, size_t len) { u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num; @@ -57,7 +59,7 @@ Acked-by: Takashi Iwai u8 dialog_token; int ies_len; -@@ -496,16 +496,17 @@ void ieee80211_process_addba_request(str +@@ -495,16 +495,17 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, ies_len = len - offsetof(struct ieee80211_mgmt, u.action.u.addba_req.variable); if (ies_len) { @@ -79,6 +81,8 @@ Acked-by: Takashi Iwai } void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, +diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c +index 5d6ca4c3e698..66b00046f0c2 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -9,7 +9,7 @@ @@ -90,7 +94,7 @@ Acked-by: Takashi Iwai */ #include -@@ -1600,7 +1600,7 @@ void ieee80211_rx_mgmt_probe_beacon(stru +@@ -1589,7 +1589,7 @@ void ieee80211_rx_mgmt_probe_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_rx_status *rx_status) { size_t baselen; @@ -99,18 +103,17 @@ Acked-by: Takashi Iwai BUILD_BUG_ON(offsetof(typeof(mgmt->u.probe_resp), variable) != offsetof(typeof(mgmt->u.beacon), variable)); -@@ -1613,10 +1613,14 @@ void ieee80211_rx_mgmt_probe_beacon(stru +@@ -1602,10 +1602,14 @@ void ieee80211_rx_mgmt_probe_beacon(struct ieee80211_sub_if_data *sdata, if (baselen > len) return; - ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, - false, &elems, mgmt->bssid, NULL); -- -- ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); + elems = ieee802_11_parse_elems(mgmt->u.probe_resp.variable, + len - baselen, false, + mgmt->bssid, NULL); -+ + +- ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); + if (elems) { + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, elems); + kfree(elems); @@ -118,7 +121,7 @@ Acked-by: Takashi Iwai } void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, -@@ -1625,7 +1629,7 @@ void ieee80211_ibss_rx_queued_mgmt(struc +@@ -1614,7 +1618,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct ieee80211_rx_status *rx_status; struct ieee80211_mgmt *mgmt; u16 fc; @@ -127,7 +130,7 @@ Acked-by: Takashi Iwai int ies_len; rx_status = IEEE80211_SKB_RXCB(skb); -@@ -1662,15 +1666,16 @@ void ieee80211_ibss_rx_queued_mgmt(struc +@@ -1651,15 +1655,16 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, if (ies_len < 0) break; @@ -148,9 +151,11 @@ Acked-by: Takashi Iwai break; } } +diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h +index 7a9e529f8366..c3b8590a7e90 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h -@@ -2197,18 +2197,18 @@ static inline void ieee80211_tx_skb(stru +@@ -2192,18 +2192,18 @@ static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb_tid(sdata, skb, 7); } @@ -180,9 +185,11 @@ Acked-by: Takashi Iwai } +diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c +index 65e9335b3614..a4212a333d61 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c -@@ -1247,7 +1247,7 @@ ieee80211_mesh_rx_probe_req(struct ieee8 +@@ -1246,7 +1246,7 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata, struct sk_buff *presp; struct beacon_data *bcn; struct ieee80211_mgmt *hdr; @@ -191,7 +198,7 @@ Acked-by: Takashi Iwai size_t baselen; u8 *pos; -@@ -1256,22 +1256,24 @@ ieee80211_mesh_rx_probe_req(struct ieee8 +@@ -1255,22 +1255,24 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata, if (baselen > len) return; @@ -226,7 +233,7 @@ Acked-by: Takashi Iwai rcu_read_lock(); bcn = rcu_dereference(ifmsh->beacon); -@@ -1295,6 +1297,8 @@ ieee80211_mesh_rx_probe_req(struct ieee8 +@@ -1294,6 +1296,8 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb(sdata, presp); out: rcu_read_unlock(); @@ -235,7 +242,7 @@ Acked-by: Takashi Iwai } static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, -@@ -1305,7 +1309,7 @@ static void ieee80211_mesh_rx_bcn_presp( +@@ -1304,7 +1308,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; @@ -244,7 +251,7 @@ Acked-by: Takashi Iwai struct ieee80211_channel *channel; size_t baselen; int freq; -@@ -1320,42 +1324,48 @@ static void ieee80211_mesh_rx_bcn_presp( +@@ -1319,42 +1323,47 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, if (baselen > len) return; @@ -297,16 +304,15 @@ Acked-by: Takashi Iwai } if (ifmsh->sync_ops) - ifmsh->sync_ops->rx_bcn_presp(sdata, -- stype, mgmt, &elems, rx_status); -+ stype, mgmt, elems, rx_status); -+ + ifmsh->sync_ops->rx_bcn_presp(sdata, stype, mgmt, len, +- elems.mesh_config, rx_status); ++ elems->mesh_config, rx_status); +free: + kfree(elems); } int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata) -@@ -1447,7 +1457,7 @@ static void mesh_rx_csa_frame(struct iee +@@ -1446,7 +1455,7 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; @@ -315,7 +321,7 @@ Acked-by: Takashi Iwai u16 pre_value; bool fwd_csa = true; size_t baselen; -@@ -1460,33 +1470,37 @@ static void mesh_rx_csa_frame(struct iee +@@ -1459,33 +1468,37 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, pos = mgmt->u.action.u.chan_switch.variable; baselen = offsetof(struct ieee80211_mgmt, u.action.u.chan_switch.variable); @@ -363,6 +369,8 @@ Acked-by: Takashi Iwai } static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata, +diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c +index a05b615deb51..44a6fdb6efbd 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -1,7 +1,7 @@ @@ -374,7 +382,7 @@ Acked-by: Takashi Iwai * Author: Luis Carlos Cobo */ -@@ -908,7 +908,7 @@ static void hwmp_rann_frame_process(stru +@@ -908,7 +908,7 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { @@ -383,7 +391,7 @@ Acked-by: Takashi Iwai size_t baselen; u32 path_metric; struct sta_info *sta; -@@ -926,37 +926,41 @@ void mesh_rx_path_sel_frame(struct ieee8 +@@ -926,37 +926,41 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); baselen = (u8 *) mgmt->u.action.u.mesh_action.variable - (u8 *) mgmt; @@ -443,6 +451,8 @@ Acked-by: Takashi Iwai } /** +diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c +index a6915847d78a..a829470dd59e 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -1,7 +1,7 @@ @@ -454,7 +464,7 @@ Acked-by: Takashi Iwai * Author: Luis Carlos Cobo */ #include -@@ -1200,7 +1200,7 @@ void mesh_rx_plink_frame(struct ieee8021 +@@ -1200,7 +1200,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status) { @@ -463,7 +473,7 @@ Acked-by: Takashi Iwai size_t baselen; u8 *baseaddr; -@@ -1228,7 +1228,8 @@ void mesh_rx_plink_frame(struct ieee8021 +@@ -1228,7 +1228,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, if (baselen > len) return; } @@ -475,9 +485,11 @@ Acked-by: Takashi Iwai + mesh_process_plink_frame(sdata, mgmt, elems, rx_status); + kfree(elems); } +diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c +index e18bd07f6822..e80f3388b0c5 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c -@@ -3312,8 +3312,11 @@ static bool ieee80211_assoc_success(stru +@@ -3291,8 +3291,11 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, aid = 0; /* TODO */ } capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); @@ -491,7 +503,7 @@ Acked-by: Takashi Iwai if (elems->aid_resp) aid = le16_to_cpu(elems->aid_resp->aid); -@@ -3335,7 +3338,8 @@ static bool ieee80211_assoc_success(stru +@@ -3314,7 +3317,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, if (!is_s1g && !elems->supp_rates) { sdata_info(sdata, "no SuppRates element in AssocResp\n"); @@ -501,7 +513,7 @@ Acked-by: Takashi Iwai } sdata->vif.bss_conf.aid = aid; -@@ -3357,7 +3361,7 @@ static bool ieee80211_assoc_success(stru +@@ -3336,7 +3340,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && (!elems->vht_cap_elem || !elems->vht_operation)))) { const struct cfg80211_bss_ies *ies; @@ -510,7 +522,7 @@ Acked-by: Takashi Iwai rcu_read_lock(); ies = rcu_dereference(cbss->ies); -@@ -3368,13 +3372,17 @@ static bool ieee80211_assoc_success(stru +@@ -3347,13 +3351,17 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, if (!bss_ies) return false; @@ -534,7 +546,7 @@ Acked-by: Takashi Iwai sdata_info(sdata, "AP bug: WMM param missing from AssocResp\n"); } -@@ -3383,30 +3391,32 @@ static bool ieee80211_assoc_success(stru +@@ -3362,30 +3370,32 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, * Also check if we requested HT/VHT, otherwise the AP doesn't * have to include the IEs in the (re)association response. */ @@ -575,7 +587,7 @@ Acked-by: Takashi Iwai } /* -@@ -3660,6 +3670,7 @@ static bool ieee80211_assoc_success(stru +@@ -3630,6 +3640,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, ret = true; out: @@ -583,7 +595,7 @@ Acked-by: Takashi Iwai kfree(bss_ies); return ret; } -@@ -3671,7 +3682,7 @@ static void ieee80211_rx_mgmt_assoc_resp +@@ -3641,7 +3652,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data; u16 capab_info, status_code, aid; @@ -592,7 +604,7 @@ Acked-by: Takashi Iwai int ac, uapsd_queues = -1; u8 *pos; bool reassoc; -@@ -3728,14 +3739,16 @@ static void ieee80211_rx_mgmt_assoc_resp +@@ -3698,14 +3709,16 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, fils_decrypt_assoc_resp(sdata, (u8 *)mgmt, &len, assoc_data) < 0) return; @@ -614,7 +626,7 @@ Acked-by: Takashi Iwai ms = tu * 1024 / 1000; sdata_info(sdata, "%pM rejected association temporarily; comeback duration %u TU (%u ms)\n", -@@ -3755,7 +3768,7 @@ static void ieee80211_rx_mgmt_assoc_resp +@@ -3725,7 +3738,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, event.u.mlme.reason = status_code; drv_event_callback(sdata->local, sdata, &event); } else { @@ -623,7 +635,7 @@ Acked-by: Takashi Iwai /* oops -- internal error -- send timeout for now */ ieee80211_destroy_assoc_data(sdata, false, false); cfg80211_assoc_timeout(sdata->dev, cbss); -@@ -3785,6 +3798,7 @@ static void ieee80211_rx_mgmt_assoc_resp +@@ -3755,6 +3768,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, ifmgd->assoc_req_ies, ifmgd->assoc_req_ies_len); notify_driver: drv_mgd_complete_tx(sdata->local, sdata, &info); @@ -631,7 +643,7 @@ Acked-by: Takashi Iwai } static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, -@@ -3989,7 +4003,7 @@ static void ieee80211_rx_mgmt_beacon(str +@@ -3959,7 +3973,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; struct ieee80211_mgmt *mgmt = (void *) hdr; size_t baselen; @@ -640,7 +652,7 @@ Acked-by: Takashi Iwai struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_channel *chan; -@@ -4035,15 +4049,16 @@ static void ieee80211_rx_mgmt_beacon(str +@@ -4005,15 +4019,16 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon && ieee80211_rx_our_beacon(bssid, ifmgd->assoc_data->bss)) { @@ -663,7 +675,7 @@ Acked-by: Takashi Iwai ifmgd->have_beacon = true; ifmgd->assoc_data->need_beacon = false; if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) { -@@ -4051,17 +4066,17 @@ static void ieee80211_rx_mgmt_beacon(str +@@ -4021,17 +4036,17 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, le64_to_cpu(mgmt->u.beacon.timestamp); sdata->vif.bss_conf.sync_device_ts = rx_status->device_timestamp; @@ -686,7 +698,7 @@ Acked-by: Takashi Iwai bss_conf->ema_ap = true; else bss_conf->ema_ap = false; -@@ -4070,6 +4085,7 @@ static void ieee80211_rx_mgmt_beacon(str +@@ -4040,6 +4055,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ifmgd->assoc_data->timeout = jiffies; ifmgd->assoc_data->timeout_started = true; run_again(sdata, ifmgd->assoc_data->timeout); @@ -694,7 +706,7 @@ Acked-by: Takashi Iwai return; } -@@ -4101,14 +4117,15 @@ static void ieee80211_rx_mgmt_beacon(str +@@ -4071,14 +4087,15 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, */ if (!ieee80211_is_s1g_beacon(hdr->frame_control)) ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4); @@ -716,7 +728,7 @@ Acked-by: Takashi Iwai if (local->hw.conf.dynamic_ps_timeout > 0) { if (local->hw.conf.flags & IEEE80211_CONF_PS) { local->hw.conf.flags &= ~IEEE80211_CONF_PS; -@@ -4178,12 +4195,12 @@ static void ieee80211_rx_mgmt_beacon(str +@@ -4148,12 +4165,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, le64_to_cpu(mgmt->u.beacon.timestamp); sdata->vif.bss_conf.sync_device_ts = rx_status->device_timestamp; @@ -731,7 +743,7 @@ Acked-by: Takashi Iwai ifmgd->beacon_crc = ncrc; ifmgd->beacon_crc_valid = true; -@@ -4191,12 +4208,12 @@ static void ieee80211_rx_mgmt_beacon(str +@@ -4161,12 +4178,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, rx_status->device_timestamp, @@ -748,7 +760,7 @@ Acked-by: Takashi Iwai changed |= BSS_CHANGED_QOS; /* -@@ -4205,7 +4222,7 @@ static void ieee80211_rx_mgmt_beacon(str +@@ -4175,7 +4192,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, */ if (!ifmgd->have_beacon) { /* a few bogus AP send dtim_period = 0 or no TIM IE */ @@ -757,7 +769,7 @@ Acked-by: Takashi Iwai changed |= BSS_CHANGED_BEACON_INFO; ifmgd->have_beacon = true; -@@ -4217,9 +4234,9 @@ static void ieee80211_rx_mgmt_beacon(str +@@ -4187,9 +4204,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_ps_vif(sdata); } @@ -769,7 +781,7 @@ Acked-by: Takashi Iwai } else { erp_valid = false; } -@@ -4232,12 +4249,12 @@ static void ieee80211_rx_mgmt_beacon(str +@@ -4202,12 +4219,12 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, bssid); @@ -787,7 +799,7 @@ Acked-by: Takashi Iwai mutex_unlock(&local->sta_mtx); sdata_info(sdata, "failed to follow AP %pM bandwidth change, disconnect\n", -@@ -4249,21 +4266,23 @@ static void ieee80211_rx_mgmt_beacon(str +@@ -4219,21 +4236,23 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, sizeof(deauth_buf), true, WLAN_REASON_DEAUTH_LEAVING, false); @@ -818,7 +830,7 @@ Acked-by: Takashi Iwai } void ieee80211_sta_rx_queued_ext(struct ieee80211_sub_if_data *sdata, -@@ -4292,7 +4311,6 @@ void ieee80211_sta_rx_queued_mgmt(struct +@@ -4262,7 +4281,6 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct ieee80211_rx_status *rx_status; struct ieee80211_mgmt *mgmt; u16 fc; @@ -826,7 +838,7 @@ Acked-by: Takashi Iwai int ies_len; rx_status = (struct ieee80211_rx_status *) skb->cb; -@@ -4324,6 +4342,8 @@ void ieee80211_sta_rx_queued_mgmt(struct +@@ -4294,6 +4312,8 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, break; case IEEE80211_STYPE_ACTION: if (mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) { @@ -835,7 +847,7 @@ Acked-by: Takashi Iwai ies_len = skb->len - offsetof(struct ieee80211_mgmt, u.action.u.chan_switch.variable); -@@ -4332,18 +4352,21 @@ void ieee80211_sta_rx_queued_mgmt(struct +@@ -4302,18 +4322,21 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, break; /* CSA IE cannot be overridden, no need for BSSID */ @@ -862,7 +874,7 @@ Acked-by: Takashi Iwai ies_len = skb->len - offsetof(struct ieee80211_mgmt, u.action.u.ext_chan_switch.variable); -@@ -4355,21 +4378,22 @@ void ieee80211_sta_rx_queued_mgmt(struct +@@ -4325,21 +4348,22 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, * extended CSA IE can't be overridden, no need for * BSSID */ @@ -891,6 +903,8 @@ Acked-by: Takashi Iwai } break; } +diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c +index 6b50cb5e0e3c..5e6b275afc9e 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -9,7 +9,7 @@ @@ -902,7 +916,7 @@ Acked-by: Takashi Iwai */ #include -@@ -155,7 +155,7 @@ ieee80211_bss_info_update(struct ieee802 +@@ -155,7 +155,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local, }; bool signal_valid; struct ieee80211_sub_if_data *scan_sdata; @@ -911,7 +925,7 @@ Acked-by: Takashi Iwai size_t baselen; u8 *elements; -@@ -209,8 +209,10 @@ ieee80211_bss_info_update(struct ieee802 +@@ -209,8 +209,10 @@ ieee80211_bss_info_update(struct ieee80211_local *local, if (baselen > len) return NULL; @@ -924,7 +938,7 @@ Acked-by: Takashi Iwai /* In case the signal is invalid update the status */ signal_valid = channel == cbss->channel; -@@ -218,15 +220,17 @@ ieee80211_bss_info_update(struct ieee802 +@@ -218,15 +220,17 @@ ieee80211_bss_info_update(struct ieee80211_local *local, rx_status->flag |= RX_FLAG_NO_SIGNAL_VAL; bss = (void *)cbss->priv; @@ -944,6 +958,8 @@ Acked-by: Takashi Iwai return bss; } +diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c +index 45e532ad1215..137be9ec94af 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -6,7 +6,7 @@ @@ -955,7 +971,7 @@ Acked-by: Takashi Iwai */ #include -@@ -1684,7 +1684,7 @@ ieee80211_process_tdls_channel_switch_re +@@ -1684,7 +1684,7 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; @@ -964,7 +980,7 @@ Acked-by: Takashi Iwai struct sta_info *sta; struct ieee80211_tdls_data *tf = (void *)skb->data; bool local_initiator; -@@ -1718,16 +1718,20 @@ ieee80211_process_tdls_channel_switch_re +@@ -1718,16 +1718,20 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata, goto call_drv; } @@ -990,7 +1006,7 @@ Acked-by: Takashi Iwai tdls_dbg(sdata, "TDLS channel switch resp - missing IEs\n"); ret = -EINVAL; goto out; -@@ -1735,15 +1739,15 @@ ieee80211_process_tdls_channel_switch_re +@@ -1735,15 +1739,15 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata, /* validate the initiator is set correctly */ local_initiator = @@ -1009,7 +1025,7 @@ Acked-by: Takashi Iwai params.tmpl_skb = ieee80211_tdls_ch_sw_resp_tmpl_get(sta, ¶ms.ch_sw_tm_ie); -@@ -1763,6 +1767,7 @@ call_drv: +@@ -1763,6 +1767,7 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata, out: mutex_unlock(&local->sta_mtx); dev_kfree_skb_any(params.tmpl_skb); @@ -1017,7 +1033,7 @@ Acked-by: Takashi Iwai return ret; } -@@ -1771,7 +1776,7 @@ ieee80211_process_tdls_channel_switch_re +@@ -1771,7 +1776,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_local *local = sdata->local; @@ -1026,7 +1042,7 @@ Acked-by: Takashi Iwai struct cfg80211_chan_def chandef; struct ieee80211_channel *chan; enum nl80211_channel_type chan_type; -@@ -1831,22 +1836,27 @@ ieee80211_process_tdls_channel_switch_re +@@ -1831,22 +1836,27 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, return -EINVAL; } @@ -1062,7 +1078,7 @@ Acked-by: Takashi Iwai case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: chan_type = NL80211_CHAN_HT40PLUS; break; -@@ -1865,7 +1875,8 @@ ieee80211_process_tdls_channel_switch_re +@@ -1865,7 +1875,8 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, if (!cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &chandef, sdata->wdev.iftype)) { tdls_dbg(sdata, "TDLS chan switch to forbidden channel\n"); @@ -1072,7 +1088,7 @@ Acked-by: Takashi Iwai } mutex_lock(&local->sta_mtx); -@@ -1881,7 +1892,7 @@ ieee80211_process_tdls_channel_switch_re +@@ -1881,7 +1892,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, /* validate the initiator is set correctly */ local_initiator = @@ -1081,7 +1097,7 @@ Acked-by: Takashi Iwai if (local_initiator == sta->sta.tdls_initiator) { tdls_dbg(sdata, "TDLS chan switch invalid lnk-id initiator\n"); ret = -EINVAL; -@@ -1889,16 +1900,16 @@ ieee80211_process_tdls_channel_switch_re +@@ -1889,16 +1900,16 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, } /* peer should have known better */ @@ -1102,7 +1118,7 @@ Acked-by: Takashi Iwai params.tmpl_skb = ieee80211_tdls_ch_sw_resp_tmpl_get(sta, -@@ -1917,6 +1928,8 @@ ieee80211_process_tdls_channel_switch_re +@@ -1917,6 +1928,8 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, out: mutex_unlock(&local->sta_mtx); dev_kfree_skb_any(params.tmpl_skb); @@ -1111,9 +1127,11 @@ Acked-by: Takashi Iwai return ret; } +diff --git a/net/mac80211/util.c b/net/mac80211/util.c +index dce841228297..ca8008ba9b1f 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c -@@ -1384,8 +1384,8 @@ _ieee802_11_parse_elems_crc(const u8 *st +@@ -1391,8 +1391,8 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, struct ieee802_11_elems *elems, @@ -1124,7 +1142,7 @@ Acked-by: Takashi Iwai u8 *nontransmitted_profile) { const struct element *elem, *sub; -@@ -1452,16 +1452,20 @@ static size_t ieee802_11_find_bssid_prof +@@ -1457,16 +1457,20 @@ static size_t ieee802_11_find_bssid_profile(const u8 *start, size_t len, return found ? profile_len : 0; } @@ -1150,7 +1168,7 @@ Acked-by: Takashi Iwai elems->ie_start = start; elems->total_len = len; -@@ -1508,6 +1512,8 @@ void ieee802_11_parse_elems_crc(const u8 +@@ -1513,6 +1517,8 @@ void ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, kfree(nontransmitted_profile); elems->crc = crc; @@ -1159,3 +1177,6 @@ Acked-by: Takashi Iwai } void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata, +-- +2.35.3 + diff --git a/patches.suse/mac80211-limit-bandwidth-in-HE-capabilities.patch b/patches.suse/mac80211-limit-bandwidth-in-HE-capabilities.patch new file mode 100644 index 0000000..bb076d7 --- /dev/null +++ b/patches.suse/mac80211-limit-bandwidth-in-HE-capabilities.patch @@ -0,0 +1,154 @@ +From 1f2c104448477512fcf7296df54bfbc3a6f9a765 Mon Sep 17 00:00:00 2001 +From: Johannes Berg +Date: Wed, 2 Feb 2022 10:49:34 +0200 +Subject: [PATCH] mac80211: limit bandwidth in HE capabilities +Git-commit: 1f2c104448477512fcf7296df54bfbc3a6f9a765 +Patch-mainline: v5.18-rc1 +References: git-fixes + +If we're limiting bandwidth for some reason such as regulatory +restrictions, then advertise that limitation just like we do +for VHT today, so the AP is aware we cannot use the higher BW +it might be using. + +Fixes: 41cbb0f5a295 ("mac80211: add support for HE") +Signed-off-by: Johannes Berg +Signed-off-by: Luca Coelho +Link: https://lore.kernel.org/r/iwlwifi.20220202104617.70c8e3e7ee76.If317630de69ff1146bec7d47f5b83038695eb71d@changeid +Acked-by: Takashi Iwai + +--- + net/mac80211/ieee80211_i.h | 2 +- + net/mac80211/mesh.c | 2 +- + net/mac80211/mlme.c | 11 ++++++++--- + net/mac80211/util.c | 27 ++++++++++++++++++++++----- + 4 files changed, 32 insertions(+), 10 deletions(-) + +diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h +index 330ea62231fa..da35791b8378 100644 +--- a/net/mac80211/ieee80211_i.h ++++ b/net/mac80211/ieee80211_i.h +@@ -2380,7 +2380,7 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, + u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, + const struct cfg80211_chan_def *chandef); + u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype); +-u8 *ieee80211_ie_build_he_cap(u8 *pos, ++u8 *ieee80211_ie_build_he_cap(u32 disable_flags, u8 *pos, + const struct ieee80211_sta_he_cap *he_cap, + u8 *end); + void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata, +diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c +index 15ac08d111ea..6847fdf93439 100644 +--- a/net/mac80211/mesh.c ++++ b/net/mac80211/mesh.c +@@ -580,7 +580,7 @@ int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata, + return -ENOMEM; + + pos = skb_put(skb, ie_len); +- ieee80211_ie_build_he_cap(pos, he_cap, pos + ie_len); ++ ieee80211_ie_build_he_cap(0, pos, he_cap, pos + ie_len); + + return 0; + } +diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c +index 1eeabdf10052..55e21557a3d2 100644 +--- a/net/mac80211/mlme.c ++++ b/net/mac80211/mlme.c +@@ -635,7 +635,7 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, + struct ieee80211_supported_band *sband) + { +- u8 *pos; ++ u8 *pos, *pre_he_pos; + const struct ieee80211_sta_he_cap *he_cap = NULL; + struct ieee80211_chanctx_conf *chanctx_conf; + u8 he_cap_size; +@@ -652,16 +652,21 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata, + + he_cap = ieee80211_get_he_iftype_cap(sband, + ieee80211_vif_type_p2p(&sdata->vif)); +- if (!he_cap || !reg_cap) ++ if (!he_cap || !chanctx_conf || !reg_cap) + return; + ++ /* get a max size estimate */ + he_cap_size = + 2 + 1 + sizeof(he_cap->he_cap_elem) + + ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem) + + ieee80211_he_ppe_size(he_cap->ppe_thres[0], + he_cap->he_cap_elem.phy_cap_info); + pos = skb_put(skb, he_cap_size); +- ieee80211_ie_build_he_cap(pos, he_cap, pos + he_cap_size); ++ pre_he_pos = pos; ++ pos = ieee80211_ie_build_he_cap(sdata->u.mgd.flags, ++ pos, he_cap, pos + he_cap_size); ++ /* trim excess if any */ ++ skb_trim(skb, skb->len - (pre_he_pos + he_cap_size - pos)); + + ieee80211_ie_build_he_6ghz_cap(sdata, skb); + } +diff --git a/net/mac80211/util.c b/net/mac80211/util.c +index f71b042a5c8b..342c2bfe2709 100644 +--- a/net/mac80211/util.c ++++ b/net/mac80211/util.c +@@ -1974,7 +1974,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, + if (he_cap && + cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band), + IEEE80211_CHAN_NO_HE)) { +- pos = ieee80211_ie_build_he_cap(pos, he_cap, end); ++ pos = ieee80211_ie_build_he_cap(0, pos, he_cap, end); + if (!pos) + goto out_err; + } +@@ -2918,10 +2918,11 @@ u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype) + he_cap->he_cap_elem.phy_cap_info); + } + +-u8 *ieee80211_ie_build_he_cap(u8 *pos, ++u8 *ieee80211_ie_build_he_cap(u32 disable_flags, u8 *pos, + const struct ieee80211_sta_he_cap *he_cap, + u8 *end) + { ++ struct ieee80211_he_cap_elem elem; + u8 n; + u8 ie_len; + u8 *orig_pos = pos; +@@ -2934,7 +2935,23 @@ u8 *ieee80211_ie_build_he_cap(u8 *pos, + if (!he_cap) + return orig_pos; + +- n = ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem); ++ /* modify on stack first to calculate 'n' and 'ie_len' correctly */ ++ elem = he_cap->he_cap_elem; ++ ++ if (disable_flags & IEEE80211_STA_DISABLE_40MHZ) ++ elem.phy_cap_info[0] &= ++ ~(IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G | ++ IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G); ++ ++ if (disable_flags & IEEE80211_STA_DISABLE_160MHZ) ++ elem.phy_cap_info[0] &= ++ ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G; ++ ++ if (disable_flags & IEEE80211_STA_DISABLE_80P80MHZ) ++ elem.phy_cap_info[0] &= ++ ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G; ++ ++ n = ieee80211_he_mcs_nss_size(&elem); + ie_len = 2 + 1 + + sizeof(he_cap->he_cap_elem) + n + + ieee80211_he_ppe_size(he_cap->ppe_thres[0], +@@ -2948,8 +2965,8 @@ u8 *ieee80211_ie_build_he_cap(u8 *pos, + *pos++ = WLAN_EID_EXT_HE_CAPABILITY; + + /* Fixed data */ +- memcpy(pos, &he_cap->he_cap_elem, sizeof(he_cap->he_cap_elem)); +- pos += sizeof(he_cap->he_cap_elem); ++ memcpy(pos, &elem, sizeof(elem)); ++ pos += sizeof(elem); + + memcpy(pos, &he_cap->he_mcs_nss_supp, n); + pos += n; +-- +2.35.3 + diff --git a/patches.suse/mac80211-mesh-clean-up-rx_bcn_presp-API.patch b/patches.suse/mac80211-mesh-clean-up-rx_bcn_presp-API.patch new file mode 100644 index 0000000..beaf9c2 --- /dev/null +++ b/patches.suse/mac80211-mesh-clean-up-rx_bcn_presp-API.patch @@ -0,0 +1,126 @@ +From a5b983c6073140b624f64e79fea6d33c3e4315a0 Mon Sep 17 00:00:00 2001 +From: Johannes Berg +Date: Mon, 20 Sep 2021 15:40:07 +0200 +Subject: [PATCH] mac80211: mesh: clean up rx_bcn_presp API +Git-commit: a5b983c6073140b624f64e79fea6d33c3e4315a0 +References: git-fixes +Patch-mainline: v5.16-rc1 + +We currently pass the entire elements to the rx_bcn_presp() +method, but only need mesh_config. Additionally, we use the +length of the elements to calculate back the entire frame's +length, but that's confusing - just pass the length of the +frame instead. + +Link: https://lore.kernel.org/r/20210920154009.a18ed3d2da6c.I1824b773a0fbae4453e1433c184678ca14e8df45@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Oliver Neukum +--- + net/mac80211/ieee80211_i.h | 7 +++---- + net/mac80211/mesh.c | 4 ++-- + net/mac80211/mesh_sync.c | 26 ++++++++++++-------------- + 3 files changed, 17 insertions(+), 20 deletions(-) + +diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h +index 159af6c3ffb0..d74031bb4ae6 100644 +--- a/net/mac80211/ieee80211_i.h ++++ b/net/mac80211/ieee80211_i.h +@@ -631,10 +631,9 @@ struct ieee80211_if_ocb { + */ + struct ieee802_11_elems; + struct ieee80211_mesh_sync_ops { +- void (*rx_bcn_presp)(struct ieee80211_sub_if_data *sdata, +- u16 stype, +- struct ieee80211_mgmt *mgmt, +- struct ieee802_11_elems *elems, ++ void (*rx_bcn_presp)(struct ieee80211_sub_if_data *sdata, u16 stype, ++ struct ieee80211_mgmt *mgmt, unsigned int len, ++ const struct ieee80211_meshconf_ie *mesh_cfg, + struct ieee80211_rx_status *rx_status); + + /* should be called with beacon_data under RCU read lock */ +diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c +index 97095b7c9c64..65e9335b3614 100644 +--- a/net/mac80211/mesh.c ++++ b/net/mac80211/mesh.c +@@ -1353,8 +1353,8 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, + } + + if (ifmsh->sync_ops) +- ifmsh->sync_ops->rx_bcn_presp(sdata, +- stype, mgmt, &elems, rx_status); ++ ifmsh->sync_ops->rx_bcn_presp(sdata, stype, mgmt, len, ++ elems.mesh_config, rx_status); + } + + int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata) +diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c +index fde93de2b80a..9e342cc2504c 100644 +--- a/net/mac80211/mesh_sync.c ++++ b/net/mac80211/mesh_sync.c +@@ -3,6 +3,7 @@ + * Copyright 2011-2012, Pavel Zubarev + * Copyright 2011-2012, Marco Porsch + * Copyright 2011-2012, cozybit Inc. ++ * Copyright (C) 2021 Intel Corporation + */ + + #include "ieee80211_i.h" +@@ -35,12 +36,12 @@ struct sync_method { + /** + * mesh_peer_tbtt_adjusting - check if an mp is currently adjusting its TBTT + * +- * @ie: information elements of a management frame from the mesh peer ++ * @cfg: mesh config element from the mesh peer (or %NULL) + */ +-static bool mesh_peer_tbtt_adjusting(struct ieee802_11_elems *ie) ++static bool mesh_peer_tbtt_adjusting(const struct ieee80211_meshconf_ie *cfg) + { +- return (ie->mesh_config->meshconf_cap & +- IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING) != 0; ++ return cfg && ++ (cfg->meshconf_cap & IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING); + } + + void mesh_sync_adjust_tsf(struct ieee80211_sub_if_data *sdata) +@@ -76,11 +77,11 @@ void mesh_sync_adjust_tsf(struct ieee80211_sub_if_data *sdata) + } + } + +-static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, +- u16 stype, +- struct ieee80211_mgmt *mgmt, +- struct ieee802_11_elems *elems, +- struct ieee80211_rx_status *rx_status) ++static void ++mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, u16 stype, ++ struct ieee80211_mgmt *mgmt, unsigned int len, ++ const struct ieee80211_meshconf_ie *mesh_cfg, ++ struct ieee80211_rx_status *rx_status) + { + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct ieee80211_local *local = sdata->local; +@@ -101,10 +102,7 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, + */ + if (ieee80211_have_rx_timestamp(rx_status)) + t_r = ieee80211_calculate_rx_timestamp(local, rx_status, +- 24 + 12 + +- elems->total_len + +- FCS_LEN, +- 24); ++ len + FCS_LEN, 24); + else + t_r = drv_get_tsf(local, sdata); + +@@ -119,7 +117,7 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, + * dot11MeshNbrOffsetMaxNeighbor non-peer non-MBSS neighbors + */ + +- if (elems->mesh_config && mesh_peer_tbtt_adjusting(elems)) { ++ if (mesh_peer_tbtt_adjusting(mesh_cfg)) { + msync_dbg(sdata, "STA %pM : is adjusting TBTT\n", + sta->sta.addr); + goto no_sync; +-- +2.35.3 + diff --git a/patches.suse/md-add-error-handling-support-for-add_disk.patch b/patches.suse/md-add-error-handling-support-for-add_disk.patch new file mode 100644 index 0000000..625938d --- /dev/null +++ b/patches.suse/md-add-error-handling-support-for-add_disk.patch @@ -0,0 +1,43 @@ +From: Luis Chamberlain +Date: Wed, 1 Sep 2021 13:38:30 +0200 +Subject: [PATCH] md: add error handling support for add_disk() +Git-commit: 9be68dd7ac0e13be2ac57770c1f921d6b3294c6e +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +We just do the unwinding of what was not done before, and are +sure to unlock prior to bailing. + +Signed-off-by: Luis Chamberlain +Signed-off-by: Christoph Hellwig +Signed-off-by: Song Liu +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/md/md.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/md/md.c b/drivers/md/md.c +index 22310d5d8d41..eff3d23e1fcd 100644 +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -5700,7 +5700,11 @@ static int md_alloc(dev_t dev, char *name) + disk->flags |= GENHD_FL_EXT_DEVT; + disk->events |= DISK_EVENT_MEDIA_CHANGE; + mddev->gendisk = disk; +- add_disk(disk); ++ error = add_disk(disk); ++ if (error) { ++ blk_cleanup_disk(disk); ++ goto abort; ++ } + + error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md"); + if (error) { +-- +2.35.3 + diff --git a/patches.suse/md-add-the-bitmap-group-to-the-default-groups-for-th.patch b/patches.suse/md-add-the-bitmap-group-to-the-default-groups-for-th.patch new file mode 100644 index 0000000..d923685 --- /dev/null +++ b/patches.suse/md-add-the-bitmap-group-to-the-default-groups-for-th.patch @@ -0,0 +1,78 @@ +From: Christoph Hellwig +Date: Wed, 1 Sep 2021 13:38:31 +0200 +Subject: [PATCH] md: add the bitmap group to the default groups for the md + kobject +Git-commit: 51238e7fbd6182e36dbc093c92ae93142c57c0f5 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Replace the deprecated default_attrs with the default_groups mechanism, +and add the always visible bitmap group to the groups created add +kobject_add time. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Song Liu +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/md/md.c | 16 +++++++++++----- + 1 file changed, 11 insertions(+), 5 deletions(-) + +diff --git a/drivers/md/md.c b/drivers/md/md.c +index eff3d23e1fcd..e04a23dc46bf 100644 +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -5490,6 +5490,10 @@ static struct attribute *md_default_attrs[] = { + NULL, + }; + ++static const struct attribute_group md_default_group = { ++ .attrs = md_default_attrs, ++}; ++ + static struct attribute *md_redundancy_attrs[] = { + &md_scan_mode.attr, + &md_last_scan_mode.attr, +@@ -5512,6 +5516,12 @@ static const struct attribute_group md_redundancy_group = { + .attrs = md_redundancy_attrs, + }; + ++static const struct attribute_group *md_attr_groups[] = { ++ &md_default_group, ++ &md_bitmap_group, ++ NULL, ++}; ++ + static ssize_t + md_attr_show(struct kobject *kobj, struct attribute *attr, char *page) + { +@@ -5587,7 +5597,7 @@ static const struct sysfs_ops md_sysfs_ops = { + static struct kobj_type md_ktype = { + .release = md_free, + .sysfs_ops = &md_sysfs_ops, +- .default_attrs = md_default_attrs, ++ .default_groups = md_attr_groups, + }; + + int mdp_major = 0; +@@ -5596,7 +5606,6 @@ static void mddev_delayed_delete(struct work_struct *ws) + { + struct mddev *mddev = container_of(ws, struct mddev, del_work); + +- sysfs_remove_group(&mddev->kobj, &md_bitmap_group); + kobject_del(&mddev->kobj); + kobject_put(&mddev->kobj); + } +@@ -5715,9 +5724,6 @@ static int md_alloc(dev_t dev, char *name) + disk->disk_name); + error = 0; + } +- if (mddev->kobj.sd && +- sysfs_create_group(&mddev->kobj, &md_bitmap_group)) +- pr_debug("pointless warning\n"); + abort: + mutex_unlock(&disks_mutex); + if (!error && mddev->kobj.sd) { +-- +2.35.3 + diff --git a/patches.suse/md-extend-disks_mutex-coverage.patch b/patches.suse/md-extend-disks_mutex-coverage.patch new file mode 100644 index 0000000..cff04ce --- /dev/null +++ b/patches.suse/md-extend-disks_mutex-coverage.patch @@ -0,0 +1,40 @@ +From: Christoph Hellwig +Date: Wed, 1 Sep 2021 13:38:32 +0200 +Subject: [PATCH] md: extend disks_mutex coverage +Git-commit: 94f3cd7d832c28681f1dea54b4dd8606e5e2bc75 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +disks_mutex is intended to serialize md_alloc. Extended it to also cover +the kobject_uevent call and getting the sysfs dirent to help reducing +error handling complexity. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Song Liu +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/md/md.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/md/md.c b/drivers/md/md.c +index e04a23dc46bf..946bbedf5dcf 100644 +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -5725,12 +5725,12 @@ static int md_alloc(dev_t dev, char *name) + error = 0; + } + abort: +- mutex_unlock(&disks_mutex); + if (!error && mddev->kobj.sd) { + kobject_uevent(&mddev->kobj, KOBJ_ADD); + mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state"); + mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level"); + } ++ mutex_unlock(&disks_mutex); + mddev_put(mddev); + return error; + } +-- +2.35.3 + diff --git a/patches.suse/md-properly-unwind-when-failing-to-add-the-kobject-i.patch b/patches.suse/md-properly-unwind-when-failing-to-add-the-kobject-i.patch new file mode 100644 index 0000000..ae3a603 --- /dev/null +++ b/patches.suse/md-properly-unwind-when-failing-to-add-the-kobject-i.patch @@ -0,0 +1,86 @@ +From: Christoph Hellwig +Date: Wed, 1 Sep 2021 13:38:33 +0200 +Subject: [PATCH] md: properly unwind when failing to add the kobject in +Git-commit: 7ad1069166c0ccdd572d27e01cc7f7f84477df1e +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + md_alloc + +Add proper error handling to delete the gendisk when failing to add +the md kobject and clean up the error unwinding in general. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Song Liu +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/md/md.c | 37 +++++++++++++++++-------------------- + 1 file changed, 17 insertions(+), 20 deletions(-) + +diff --git a/drivers/md/md.c b/drivers/md/md.c +index 946bbedf5dcf..c6b15ff0074f 100644 +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -5672,7 +5672,7 @@ static int md_alloc(dev_t dev, char *name) + strcmp(mddev2->gendisk->disk_name, name) == 0) { + spin_unlock(&all_mddevs_lock); + error = -EEXIST; +- goto abort; ++ goto out_unlock_disks_mutex; + } + spin_unlock(&all_mddevs_lock); + } +@@ -5685,7 +5685,7 @@ static int md_alloc(dev_t dev, char *name) + error = -ENOMEM; + disk = blk_alloc_disk(NUMA_NO_NODE); + if (!disk) +- goto abort; ++ goto out_unlock_disks_mutex; + + disk->major = MAJOR(mddev->unit); + disk->first_minor = unit << shift; +@@ -5710,26 +5710,23 @@ static int md_alloc(dev_t dev, char *name) + disk->events |= DISK_EVENT_MEDIA_CHANGE; + mddev->gendisk = disk; + error = add_disk(disk); +- if (error) { +- blk_cleanup_disk(disk); +- goto abort; +- } ++ if (error) ++ goto out_cleanup_disk; + + error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md"); +- if (error) { +- /* This isn't possible, but as kobject_init_and_add is marked +- * __must_check, we must do something with the result +- */ +- pr_debug("md: cannot register %s/md - name in use\n", +- disk->disk_name); +- error = 0; +- } +- abort: +- if (!error && mddev->kobj.sd) { +- kobject_uevent(&mddev->kobj, KOBJ_ADD); +- mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state"); +- mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level"); +- } ++ if (error) ++ goto out_del_gendisk; ++ ++ kobject_uevent(&mddev->kobj, KOBJ_ADD); ++ mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state"); ++ mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level"); ++ goto out_unlock_disks_mutex; ++ ++out_del_gendisk: ++ del_gendisk(disk); ++out_cleanup_disk: ++ blk_cleanup_disk(disk); ++out_unlock_disks_mutex: + mutex_unlock(&disks_mutex); + mddev_put(mddev); + return error; +-- +2.35.3 + diff --git a/patches.suse/md-use-bdev_nr_sectors-instead-of-open-coding-it.patch b/patches.suse/md-use-bdev_nr_sectors-instead-of-open-coding-it.patch new file mode 100644 index 0000000..be229f6 --- /dev/null +++ b/patches.suse/md-use-bdev_nr_sectors-instead-of-open-coding-it.patch @@ -0,0 +1,115 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:06 +0200 +Subject: [PATCH] md: use bdev_nr_sectors instead of open coding it +Git-commit: 0fe80347fd701a7261bea278cab692de79e83af7 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helper to read the block device size. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Acked-by: Song Liu +Link: https://lore.kernel.org/r/20211018101130.1838532-7-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/md/md.c | 26 +++++++++++--------------- + 1 file changed, 11 insertions(+), 15 deletions(-) + +diff --git a/drivers/md/md.c b/drivers/md/md.c +index 22310d5d8d41..d964da436383 100644 +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -888,8 +888,7 @@ static struct md_personality *find_pers(int level, char *clevel) + /* return the offset of the super block in 512byte sectors */ + static inline sector_t calc_dev_sboffset(struct md_rdev *rdev) + { +- sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512; +- return MD_NEW_SIZE_SECTORS(num_sectors); ++ return MD_NEW_SIZE_SECTORS(bdev_nr_sectors(rdev->bdev)); + } + + static int alloc_disk_sb(struct md_rdev *rdev) +@@ -1631,8 +1630,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ + */ + switch(minor_version) { + case 0: +- sb_start = i_size_read(rdev->bdev->bd_inode) >> 9; +- sb_start -= 8*2; ++ sb_start = bdev_nr_sectors(rdev->bdev) - 8 * 2; + sb_start &= ~(sector_t)(4*2-1); + break; + case 1: +@@ -1787,10 +1785,9 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ + else + ret = 0; + } +- if (minor_version) { +- sectors = (i_size_read(rdev->bdev->bd_inode) >> 9); +- sectors -= rdev->data_offset; +- } else ++ if (minor_version) ++ sectors = bdev_nr_sectors(rdev->bdev) - rdev->data_offset; ++ else + sectors = rdev->sb_start; + if (sectors < le64_to_cpu(sb->data_size)) + return -EINVAL; +@@ -2168,8 +2165,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) + return 0; /* too confusing */ + if (rdev->sb_start < rdev->data_offset) { + /* minor versions 1 and 2; superblock before data */ +- max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9; +- max_sectors -= rdev->data_offset; ++ max_sectors = bdev_nr_sectors(rdev->bdev) - rdev->data_offset; + if (!num_sectors || num_sectors > max_sectors) + num_sectors = max_sectors; + } else if (rdev->mddev->bitmap_info.offset) { +@@ -2178,7 +2174,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) + } else { + /* minor version 0; superblock after data */ + sector_t sb_start, bm_space; +- sector_t dev_size = i_size_read(rdev->bdev->bd_inode) >> 9; ++ sector_t dev_size = bdev_nr_sectors(rdev->bdev); + + /* 8K is for superblock */ + sb_start = dev_size - 8*2; +@@ -3382,7 +3378,7 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len) + if (!sectors) + return -EBUSY; + } else if (!sectors) +- sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) - ++ sectors = bdev_nr_sectors(rdev->bdev) - + rdev->data_offset; + if (!my_mddev->pers->resize) + /* Cannot change size for RAID0 or Linear etc */ +@@ -3709,7 +3705,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe + + kobject_init(&rdev->kobj, &rdev_ktype); + +- size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS; ++ size = bdev_nr_bytes(rdev->bdev) >> BLOCK_SIZE_BITS; + if (!size) { + pr_warn("md: %s has zero or unknown size, marking faulty!\n", + bdevname(rdev->bdev,b)); +@@ -6880,7 +6876,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info) + + if (!mddev->persistent) { + pr_debug("md: nonpersistent superblock ...\n"); +- rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512; ++ rdev->sb_start = bdev_nr_sectors(rdev->bdev); + } else + rdev->sb_start = calc_dev_sboffset(rdev); + rdev->sectors = rdev->sb_start; +@@ -6967,7 +6963,7 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev) + if (mddev->persistent) + rdev->sb_start = calc_dev_sboffset(rdev); + else +- rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512; ++ rdev->sb_start = bdev_nr_sectors(rdev->bdev); + + rdev->sectors = rdev->sb_start; + +-- +2.35.3 + diff --git a/patches.suse/mmc-core-Store-pointer-to-bio_crypt_ctx-in-mmc_reque.patch b/patches.suse/mmc-core-Store-pointer-to-bio_crypt_ctx-in-mmc_reque.patch new file mode 100644 index 0000000..56259b6 --- /dev/null +++ b/patches.suse/mmc-core-Store-pointer-to-bio_crypt_ctx-in-mmc_reque.patch @@ -0,0 +1,100 @@ +From: Eric Biggers +Date: Wed, 21 Jul 2021 08:47:38 -0700 +Subject: [PATCH] mmc: core: Store pointer to bio_crypt_ctx in mmc_request +Git-commit: 86c639ce08266ed521974038f0592739fec1c11a +Patch-mainline: v5.15-rc1 +References: jsc#PED-1183 + +Make 'struct mmc_request' contain a pointer to the request's +'struct bio_crypt_ctx' directly, instead of extracting a 32-bit DUN from +it which is a cqhci-crypto specific detail. + +This keeps the cqhci crypto specific details in the cqhci module, and it +makes mmc_core and mmc_block ready for MMC crypto hardware that accepts +the DUN and/or key in a way that is more flexible than that which will +be specified by the eMMC v5.2 standard. Exynos SoCs are an example of +such hardware, as their inline encryption hardware takes keys directly +(it has no concept of keyslots) and supports 128-bit DUNs. + +Note that the 32-bit DUN length specified by the standard is very +restrictive, so it is likely that more hardware will support longer DUNs +despite it not following the standard. Thus, limiting the scope of the +32-bit DUN assumption to the place that actually needs it is warranted. + +Signed-off-by: Eric Biggers +Link: https://lore.kernel.org/r/20210721154738.3966463-1-ebiggers@kernel.org +Signed-off-by: Ulf Hansson +Acked-by: Hannes Reinecke +--- + drivers/mmc/core/crypto.c | 15 ++++----------- + drivers/mmc/host/cqhci-crypto.h | 7 +++++-- + include/linux/mmc/core.h | 3 +-- + 3 files changed, 10 insertions(+), 15 deletions(-) + +diff --git a/drivers/mmc/core/crypto.c b/drivers/mmc/core/crypto.c +index 419a368f8402..67557808cada 100644 +--- a/drivers/mmc/core/crypto.c ++++ b/drivers/mmc/core/crypto.c +@@ -31,18 +31,11 @@ void mmc_crypto_prepare_req(struct mmc_queue_req *mqrq) + struct request *req = mmc_queue_req_to_req(mqrq); + struct mmc_request *mrq = &mqrq->brq.mrq; + +- if (!req->crypt_keyslot) ++ if (!req->crypt_ctx) + return; + +- mrq->crypto_enabled = true; +- mrq->crypto_key_slot = blk_ksm_get_slot_idx(req->crypt_keyslot); +- +- /* +- * For now we assume that all MMC drivers set max_dun_bytes_supported=4, +- * which is the limit for CQHCI crypto. So all DUNs should be 32-bit. +- */ +- WARN_ON_ONCE(req->crypt_ctx->bc_dun[0] > U32_MAX); +- +- mrq->data_unit_num = req->crypt_ctx->bc_dun[0]; ++ mrq->crypto_ctx = req->crypt_ctx; ++ if (req->crypt_keyslot) ++ mrq->crypto_key_slot = blk_ksm_get_slot_idx(req->crypt_keyslot); + } + EXPORT_SYMBOL_GPL(mmc_crypto_prepare_req); +diff --git a/drivers/mmc/host/cqhci-crypto.h b/drivers/mmc/host/cqhci-crypto.h +index 60b58ee0e625..d7fb084f563b 100644 +--- a/drivers/mmc/host/cqhci-crypto.h ++++ b/drivers/mmc/host/cqhci-crypto.h +@@ -22,12 +22,15 @@ int cqhci_crypto_init(struct cqhci_host *host); + */ + static inline u64 cqhci_crypto_prep_task_desc(struct mmc_request *mrq) + { +- if (!mrq->crypto_enabled) ++ if (!mrq->crypto_ctx) + return 0; + ++ /* We set max_dun_bytes_supported=4, so all DUNs should be 32-bit. */ ++ WARN_ON_ONCE(mrq->crypto_ctx->bc_dun[0] > U32_MAX); ++ + return CQHCI_CRYPTO_ENABLE_BIT | + CQHCI_CRYPTO_KEYSLOT(mrq->crypto_key_slot) | +- mrq->data_unit_num; ++ mrq->crypto_ctx->bc_dun[0]; + } + + #else /* CONFIG_MMC_CRYPTO */ +diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h +index ab19245e9945..71101d1ec825 100644 +--- a/include/linux/mmc/core.h ++++ b/include/linux/mmc/core.h +@@ -164,9 +164,8 @@ struct mmc_request { + int tag; + + #ifdef CONFIG_MMC_CRYPTO +- bool crypto_enabled; ++ const struct bio_crypt_ctx *crypto_ctx; + int crypto_key_slot; +- u32 data_unit_num; + #endif + }; + +-- +2.35.3 + diff --git a/patches.suse/msft-hv-2526-Drivers-hv-vmbus-Rework-use-of-DMA_BIT_MASK-64.patch b/patches.suse/msft-hv-2526-Drivers-hv-vmbus-Rework-use-of-DMA_BIT_MASK-64.patch new file mode 100644 index 0000000..efc0c61 --- /dev/null +++ b/patches.suse/msft-hv-2526-Drivers-hv-vmbus-Rework-use-of-DMA_BIT_MASK-64.patch @@ -0,0 +1,64 @@ +From: Michael Kelley +Date: Sun, 6 Feb 2022 11:36:56 -0800 +Patch-mainline: v5.17-rc5 +Subject: Drivers: hv: vmbus: Rework use of DMA_BIT_MASK(64) +Git-commit: 6bf625a4140f24b490766043b307f8252519578b +References: git-fixes + +Using DMA_BIT_MASK(64) as an initializer for a global variable +causes problems with Clang 12.0.1. The compiler doesn't understand +that value 64 is excluded from the shift at compile time, resulting +in a build error. + +While this is a compiler problem, avoid the issue by setting up +the dma_mask memory as part of struct hv_device, and initialize +it using dma_set_mask(). + +Reported-by: Nathan Chancellor +Reported-by: Vitaly Chikunov +Reported-by: Jakub Kicinski +Fixes: 743b237c3a7b ("scsi: storvsc: Add Isolation VM support for storvsc driver") +Signed-off-by: Michael Kelley +Reviewed-by: Nathan Chancellor +Tested-by: Nathan Chancellor +Link: https://lore.kernel.org/r/1644176216-12531-1-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +Acked-by: Olaf Hering +--- + drivers/hv/vmbus_drv.c | 4 ++-- + include/linux/hyperv.h | 1 + + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c +--- a/drivers/hv/vmbus_drv.c ++++ b/drivers/hv/vmbus_drv.c +@@ -2082,7 +2082,6 @@ struct hv_device *vmbus_device_create(const guid_t *type, + return child_device_obj; + } + +-static u64 vmbus_dma_mask = DMA_BIT_MASK(64); + /* + * vmbus_device_register - Register the child device + */ +@@ -2123,8 +2122,9 @@ int vmbus_device_register(struct hv_device *child_device_obj) + } + hv_debug_add_dev_dir(child_device_obj); + +- child_device_obj->device.dma_mask = &vmbus_dma_mask; + child_device_obj->device.dma_parms = &child_device_obj->dma_parms; ++ child_device_obj->device.dma_mask = &child_device_obj->dma_mask; ++ dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64)); + return 0; + + err_kset_unregister: +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -1262,6 +1262,7 @@ struct hv_device { + struct vmbus_channel *channel; + struct kset *channels_kset; + struct device_dma_parameters dma_parms; ++ u64 dma_mask; + + /* place holder to keep track of the dir for hv device in debugfs */ + struct dentry *debug_dir; diff --git a/patches.suse/msft-hv-2555-Drivers-hv-vmbus-Fix-initialization-of-device-object.patch b/patches.suse/msft-hv-2555-Drivers-hv-vmbus-Fix-initialization-of-device-object.patch index 7ac07c9..c5e9399 100644 --- a/patches.suse/msft-hv-2555-Drivers-hv-vmbus-Fix-initialization-of-device-object.patch +++ b/patches.suse/msft-hv-2555-Drivers-hv-vmbus-Fix-initialization-of-device-object.patch @@ -50,22 +50,24 @@ Acked-by: Olaf Hering diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c -@@ -2103,6 +2103,9 @@ int vmbus_device_register(struct hv_device *child_device_obj) +@@ -2103,6 +2103,10 @@ int vmbus_device_register(struct hv_device *child_device_obj) child_device_obj->device.parent = &hv_acpi_dev->dev; child_device_obj->device.release = vmbus_device_release; -+ child_device_obj->device.dma_mask = &vmbus_dma_mask; + child_device_obj->device.dma_parms = &child_device_obj->dma_parms; ++ child_device_obj->device.dma_mask = &child_device_obj->dma_mask; ++ dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64)); + /* * Register with the LDM. This will kick off the driver/device * binding...which will eventually call vmbus_match() and vmbus_probe() -@@ -2128,8 +2132,6 @@ int vmbus_device_register(struct hv_device *child_device_obj) +@@ -2128,9 +2132,6 @@ int vmbus_device_register(struct hv_device *child_device_obj) } hv_debug_add_dev_dir(child_device_obj); -- child_device_obj->device.dma_mask = &vmbus_dma_mask; - child_device_obj->device.dma_parms = &child_device_obj->dma_parms; +- child_device_obj->device.dma_mask = &child_device_obj->dma_mask; +- dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64)); return 0; err_kset_unregister: diff --git a/patches.suse/msft-hv-2585-Drivers-hv-vmbus-Refactor-the-ring-buffer-iterator-f.patch b/patches.suse/msft-hv-2585-Drivers-hv-vmbus-Refactor-the-ring-buffer-iterator-f.patch new file mode 100644 index 0000000..1dbe40b --- /dev/null +++ b/patches.suse/msft-hv-2585-Drivers-hv-vmbus-Refactor-the-ring-buffer-iterator-f.patch @@ -0,0 +1,156 @@ +From: "Andrea Parri (Microsoft)" +Date: Thu, 28 Apr 2022 16:51:07 +0200 +Patch-mainline: v5.19-rc1 +Subject: Drivers: hv: vmbus: Refactor the ring-buffer iterator functions +Git-commit: 1c9de08f7f952b4101f092802581344033d84429 +References: git-fixes + +With no users of hv_pkt_iter_next_raw() and no "external" users of +hv_pkt_iter_first_raw(), the iterator functions can be refactored +and simplified to remove some indirection/code. + +Signed-off-by: Andrea Parri (Microsoft) +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20220428145107.7878-6-parri.andrea@gmail.com +Signed-off-by: Wei Liu +Acked-by: Olaf Hering +--- + drivers/hv/ring_buffer.c | 32 ++++++-------------- + include/linux/hyperv.h | 35 +++------------------- + 2 files changed, 13 insertions(+), 54 deletions(-) + +diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c +--- a/drivers/hv/ring_buffer.c ++++ b/drivers/hv/ring_buffer.c +@@ -429,7 +429,7 @@ int hv_ringbuffer_read(struct vmbus_channel *channel, + memcpy(buffer, (const char *)desc + offset, packetlen); + + /* Advance ring index to next packet descriptor */ +- __hv_pkt_iter_next(channel, desc, true); ++ __hv_pkt_iter_next(channel, desc); + + /* Notify host of update */ + hv_pkt_iter_close(channel); +@@ -464,22 +464,6 @@ static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi) + return (rbi->ring_datasize - priv_read_loc) + write_loc; + } + +-/* +- * Get first vmbus packet without copying it out of the ring buffer +- */ +-struct vmpacket_descriptor *hv_pkt_iter_first_raw(struct vmbus_channel *channel) +-{ +- struct hv_ring_buffer_info *rbi = &channel->inbound; +- +- hv_debug_delay_test(channel, MESSAGE_DELAY); +- +- if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor)) +- return NULL; +- +- return (struct vmpacket_descriptor *)(hv_get_ring_buffer(rbi) + rbi->priv_read_index); +-} +-EXPORT_SYMBOL_GPL(hv_pkt_iter_first_raw); +- + /* + * Get first vmbus packet from ring buffer after read_index + * +@@ -491,11 +475,14 @@ struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel) + struct vmpacket_descriptor *desc, *desc_copy; + u32 bytes_avail, pkt_len, pkt_offset; + +- desc = hv_pkt_iter_first_raw(channel); +- if (!desc) ++ hv_debug_delay_test(channel, MESSAGE_DELAY); ++ ++ bytes_avail = hv_pkt_iter_avail(rbi); ++ if (bytes_avail < sizeof(struct vmpacket_descriptor)) + return NULL; ++ bytes_avail = min(rbi->pkt_buffer_size, bytes_avail); + +- bytes_avail = min(rbi->pkt_buffer_size, hv_pkt_iter_avail(rbi)); ++ desc = (struct vmpacket_descriptor *)(hv_get_ring_buffer(rbi) + rbi->priv_read_index); + + /* + * Ensure the compiler does not use references to incoming Hyper-V values (which +@@ -542,8 +529,7 @@ EXPORT_SYMBOL_GPL(hv_pkt_iter_first); + */ + struct vmpacket_descriptor * + __hv_pkt_iter_next(struct vmbus_channel *channel, +- const struct vmpacket_descriptor *desc, +- bool copy) ++ const struct vmpacket_descriptor *desc) + { + struct hv_ring_buffer_info *rbi = &channel->inbound; + u32 packetlen = desc->len8 << 3; +@@ -556,7 +542,7 @@ __hv_pkt_iter_next(struct vmbus_channel *channel, + rbi->priv_read_index -= dsize; + + /* more data? */ +- return copy ? hv_pkt_iter_first(channel) : hv_pkt_iter_first_raw(channel); ++ return hv_pkt_iter_first(channel); + } + EXPORT_SYMBOL_GPL(__hv_pkt_iter_next); + +diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h +--- a/include/linux/hyperv.h ++++ b/include/linux/hyperv.h +@@ -1706,55 +1706,28 @@ static inline u32 hv_pkt_len(const struct vmpacket_descriptor *desc) + return desc->len8 << 3; + } + +-struct vmpacket_descriptor * +-hv_pkt_iter_first_raw(struct vmbus_channel *channel); +- + struct vmpacket_descriptor * + hv_pkt_iter_first(struct vmbus_channel *channel); + + struct vmpacket_descriptor * + __hv_pkt_iter_next(struct vmbus_channel *channel, +- const struct vmpacket_descriptor *pkt, +- bool copy); ++ const struct vmpacket_descriptor *pkt); + + void hv_pkt_iter_close(struct vmbus_channel *channel); + + static inline struct vmpacket_descriptor * +-hv_pkt_iter_next_pkt(struct vmbus_channel *channel, +- const struct vmpacket_descriptor *pkt, +- bool copy) ++hv_pkt_iter_next(struct vmbus_channel *channel, ++ const struct vmpacket_descriptor *pkt) + { + struct vmpacket_descriptor *nxt; + +- nxt = __hv_pkt_iter_next(channel, pkt, copy); ++ nxt = __hv_pkt_iter_next(channel, pkt); + if (!nxt) + hv_pkt_iter_close(channel); + + return nxt; + } + +-/* +- * Get next packet descriptor without copying it out of the ring buffer +- * If at end of list, return NULL and update host. +- */ +-static inline struct vmpacket_descriptor * +-hv_pkt_iter_next_raw(struct vmbus_channel *channel, +- const struct vmpacket_descriptor *pkt) +-{ +- return hv_pkt_iter_next_pkt(channel, pkt, false); +-} +- +-/* +- * Get next packet descriptor from iterator +- * If at end of list, return NULL and update host. +- */ +-static inline struct vmpacket_descriptor * +-hv_pkt_iter_next(struct vmbus_channel *channel, +- const struct vmpacket_descriptor *pkt) +-{ +- return hv_pkt_iter_next_pkt(channel, pkt, true); +-} +- + #define foreach_vmbus_pkt(pkt, channel) \ + for (pkt = hv_pkt_iter_first(channel); pkt; \ + pkt = hv_pkt_iter_next(channel, pkt)) diff --git a/patches.suse/msft-hv-2613-hv_balloon-Fix-balloon_probe-and-balloon_remove-erro.patch b/patches.suse/msft-hv-2613-hv_balloon-Fix-balloon_probe-and-balloon_remove-erro.patch new file mode 100644 index 0000000..e9298bc --- /dev/null +++ b/patches.suse/msft-hv-2613-hv_balloon-Fix-balloon_probe-and-balloon_remove-erro.patch @@ -0,0 +1,75 @@ +From: Shradha Gupta +Date: Sun, 15 May 2022 21:50:58 -0700 +Patch-mainline: v5.19-rc1 +Subject: hv_balloon: Fix balloon_probe() and balloon_remove() error handling +Git-commit: d27423bf048dcb5e15f04286d001c66685e30c29 +References: git-fixes + +Add missing cleanup in balloon_probe() if the call to +balloon_connect_vsp() fails. Also correctly handle cleanup in +balloon_remove() when dm_state is DM_INIT_ERROR because +balloon_resume() failed. + +Signed-off-by: Shradha Gupta +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20220516045058.GA7933@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net +Signed-off-by: Wei Liu +Acked-by: Olaf Hering +--- + drivers/hv/hv_balloon.c | 21 ++++++++++++++++----- + 1 file changed, 16 insertions(+), 5 deletions(-) + +diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c +--- a/drivers/hv/hv_balloon.c ++++ b/drivers/hv/hv_balloon.c +@@ -1842,7 +1842,7 @@ static int balloon_probe(struct hv_device *dev, + + ret = balloon_connect_vsp(dev); + if (ret != 0) +- return ret; ++ goto connect_error; + + enable_page_reporting(); + dm_device.state = DM_INITIALIZED; +@@ -1861,6 +1861,7 @@ probe_error: + dm_device.thread = NULL; + disable_page_reporting(); + vmbus_close(dev->channel); ++connect_error: + #ifdef CONFIG_MEMORY_HOTPLUG + unregister_memory_notifier(&hv_memory_nb); + restore_online_page_callback(&hv_online_page); +@@ -1882,12 +1883,21 @@ static int balloon_remove(struct hv_device *dev) + cancel_work_sync(&dm->ha_wrk.wrk); + + kthread_stop(dm->thread); +- disable_page_reporting(); +- vmbus_close(dev->channel); ++ ++ /* ++ * This is to handle the case when balloon_resume() ++ * call has failed and some cleanup has been done as ++ * a part of the error handling. ++ */ ++ if (dm_device.state != DM_INIT_ERROR) { ++ disable_page_reporting(); ++ vmbus_close(dev->channel); + #ifdef CONFIG_MEMORY_HOTPLUG +- unregister_memory_notifier(&hv_memory_nb); +- restore_online_page_callback(&hv_online_page); ++ unregister_memory_notifier(&hv_memory_nb); ++ restore_online_page_callback(&hv_online_page); + #endif ++ } ++ + spin_lock_irqsave(&dm_device.ha_lock, flags); + list_for_each_entry_safe(has, tmp, &dm->ha_region_list, list) { + list_for_each_entry_safe(gap, tmp_gap, &has->gap_list, list) { +@@ -1948,6 +1958,7 @@ close_channel: + vmbus_close(dev->channel); + out: + dm_device.state = DM_INIT_ERROR; ++ disable_page_reporting(); + #ifdef CONFIG_MEMORY_HOTPLUG + unregister_memory_notifier(&hv_memory_nb); + restore_online_page_callback(&hv_online_page); diff --git a/patches.suse/msft-hv-2614-drm-hyperv-Removing-the-restruction-of-VRAM-allocati.patch b/patches.suse/msft-hv-2614-drm-hyperv-Removing-the-restruction-of-VRAM-allocati.patch new file mode 100644 index 0000000..1239f0e --- /dev/null +++ b/patches.suse/msft-hv-2614-drm-hyperv-Removing-the-restruction-of-VRAM-allocati.patch @@ -0,0 +1,130 @@ +From: Saurabh Sengar +Date: Sat, 21 May 2022 07:23:39 -0700 +Patch-mainline: v6.0-rc1 +Subject: drm/hyperv : Removing the restruction of VRAM allocation with PCI bar size +Git-commit: a0ab5abced550ddeefddb06055ed60779a54eb79 +References: git-fixes + +There were two different approaches getting used in this driver to +allocate vram: + 1. VRAM allocation from PCI region for Gen1 + 2. VRAM alloaction from MMIO region for Gen2 +First approach limilts the vram to PCI BAR size, which is 64 MB in most +legacy systems. This limits the maximum resolution to be restricted to +64 MB size, and with recent conclusion on fbdev issue its concluded to have +similar allocation strategy for both Gen1 and Gen2. This patch unifies +the Gen1 and Gen2 vram allocation strategy. + +Signed-off-by: Saurabh Sengar +Reviewed-by: Deepak Rawat +Signed-off-by: Deepak Rawat +Link: https://patchwork.freedesktop.org/patch/msgid/1653143019-20032-1-git-send-email-ssengar@linux.microsoft.com +Acked-by: Olaf Hering +--- + drivers/gpu/drm/hyperv/hyperv_drm_drv.c | 74 +--------------------- + 1 file changed, 3 insertions(+), 71 deletions(-) + +diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c +--- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c ++++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c +@@ -69,56 +69,7 @@ static struct pci_driver hyperv_pci_driver = { + .remove = hyperv_pci_remove, + }; + +-static int hyperv_setup_gen1(struct hyperv_drm_device *hv) +-{ +- struct drm_device *dev = &hv->dev; +- struct pci_dev *pdev; +- int ret; +- +- pdev = pci_get_device(PCI_VENDOR_ID_MICROSOFT, +- PCI_DEVICE_ID_HYPERV_VIDEO, NULL); +- if (!pdev) { +- drm_err(dev, "Unable to find PCI Hyper-V video\n"); +- return -ENODEV; +- } +- +- ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &hyperv_driver); +- if (ret) { +- drm_err(dev, "Not able to remove boot fb\n"); +- return ret; +- } +- +- if (pci_request_region(pdev, 0, DRIVER_NAME) != 0) +- drm_warn(dev, "Cannot request framebuffer, boot fb still active?\n"); +- +- if ((pdev->resource[0].flags & IORESOURCE_MEM) == 0) { +- drm_err(dev, "Resource at bar 0 is not IORESOURCE_MEM\n"); +- ret = -ENODEV; +- goto error; +- } +- +- hv->fb_base = pci_resource_start(pdev, 0); +- hv->fb_size = pci_resource_len(pdev, 0); +- if (!hv->fb_base) { +- drm_err(dev, "Resource not available\n"); +- ret = -ENODEV; +- goto error; +- } +- +- hv->fb_size = min(hv->fb_size, +- (unsigned long)(hv->mmio_megabytes * 1024 * 1024)); +- hv->vram = devm_ioremap(&pdev->dev, hv->fb_base, hv->fb_size); +- if (!hv->vram) { +- drm_err(dev, "Failed to map vram\n"); +- ret = -ENOMEM; +- } +- +-error: +- pci_dev_put(pdev); +- return ret; +-} +- +-static int hyperv_setup_gen2(struct hyperv_drm_device *hv, ++static int hyperv_setup_vram(struct hyperv_drm_device *hv, + struct hv_device *hdev) + { + struct drm_device *dev = &hv->dev; +@@ -181,10 +132,7 @@ static int hyperv_vmbus_probe(struct hv_device *hdev, + goto err_hv_set_drv_data; + } + +- if (efi_enabled(EFI_BOOT)) +- ret = hyperv_setup_gen2(hv, hdev); +- else +- ret = hyperv_setup_gen1(hv); ++ ret = hyperv_setup_vram(hv, hdev); + + if (ret) + goto err_vmbus_close; +@@ -225,29 +173,13 @@ static int hyperv_vmbus_remove(struct hv_device *hdev) + { + struct drm_device *dev = hv_get_drvdata(hdev); + struct hyperv_drm_device *hv = to_hv(dev); +- struct pci_dev *pdev; + + drm_dev_unplug(dev); + drm_atomic_helper_shutdown(dev); + vmbus_close(hdev->channel); + hv_set_drvdata(hdev, NULL); + +- /* +- * Free allocated MMIO memory only on Gen2 VMs. +- * On Gen1 VMs, release the PCI device +- */ +- if (efi_enabled(EFI_BOOT)) { +- vmbus_free_mmio(hv->mem->start, hv->fb_size); +- } else { +- pdev = pci_get_device(PCI_VENDOR_ID_MICROSOFT, +- PCI_DEVICE_ID_HYPERV_VIDEO, NULL); +- if (!pdev) { +- drm_err(dev, "Unable to find PCI Hyper-V video\n"); +- return -ENODEV; +- } +- pci_release_region(pdev, 0); +- pci_dev_put(pdev); +- } ++ vmbus_free_mmio(hv->mem->start, hv->fb_size); + + return 0; + } diff --git a/patches.suse/msft-hv-2620-HID-hyperv-Correctly-access-fields-declared-as-__le1.patch b/patches.suse/msft-hv-2620-HID-hyperv-Correctly-access-fields-declared-as-__le1.patch new file mode 100644 index 0000000..c199ba8 --- /dev/null +++ b/patches.suse/msft-hv-2620-HID-hyperv-Correctly-access-fields-declared-as-__le1.patch @@ -0,0 +1,43 @@ +From: Michael Kelley +Date: Tue, 7 Jun 2022 20:49:37 -0700 +Patch-mainline: v5.19-rc3 +Subject: HID: hyperv: Correctly access fields declared as __le16 +Git-commit: f5f93d7f5a5cbfef02609dead21e7056e83f4fab +References: git-fixes + +Add the use of le16_to_cpu() for fields declared as __le16. Because +Hyper-V only runs in Little Endian mode, there's no actual bug. +The change is made in the interest of general correctness in +addition to making sparse happy. No functional change. + +Reported-by: kernel test robot +Signed-off-by: Michael Kelley +Link: https://lore.kernel.org/r/1654660177-115463-1-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +Acked-by: Olaf Hering +--- + drivers/hid/hid-hyperv.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c +--- a/drivers/hid/hid-hyperv.c ++++ b/drivers/hid/hid-hyperv.c +@@ -199,7 +199,8 @@ static void mousevsc_on_receive_device_info(struct mousevsc_dev *input_device, + if (!input_device->hid_desc) + goto cleanup; + +- input_device->report_desc_size = desc->desc[0].wDescriptorLength; ++ input_device->report_desc_size = le16_to_cpu( ++ desc->desc[0].wDescriptorLength); + if (input_device->report_desc_size == 0) { + input_device->dev_info_status = -EINVAL; + goto cleanup; +@@ -217,7 +218,7 @@ static void mousevsc_on_receive_device_info(struct mousevsc_dev *input_device, + + memcpy(input_device->report_desc, + ((unsigned char *)desc) + desc->bLength, +- desc->desc[0].wDescriptorLength); ++ le16_to_cpu(desc->desc[0].wDescriptorLength)); + + /* Send the ack */ + memset(&ack, 0, sizeof(struct mousevsc_prt_msg)); diff --git a/patches.suse/msft-hv-2625-scsi-storvsc-Correct-reporting-of-Hyper-V-I-O-size-l.patch b/patches.suse/msft-hv-2625-scsi-storvsc-Correct-reporting-of-Hyper-V-I-O-size-l.patch new file mode 100644 index 0000000..ba05c62 --- /dev/null +++ b/patches.suse/msft-hv-2625-scsi-storvsc-Correct-reporting-of-Hyper-V-I-O-size-l.patch @@ -0,0 +1,96 @@ +From: Saurabh Sengar +Date: Tue, 14 Jun 2022 00:05:55 -0700 +Patch-mainline: v5.19-rc4 +Subject: scsi: storvsc: Correct reporting of Hyper-V I/O size limits +Git-commit: 1d3e0980782fbafaf93285779fd3905e4f866802 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +References: git-fixes + +Current code is based on the idea that the max number of SGL entries +also determines the max size of an I/O request. While this idea was +true in older versions of the storvsc driver when SGL entry length +was limited to 4 Kbytes, commit 3d9c3dcc58e9 ("scsi: storvsc: Enable +scatterlist entry lengths > 4Kbytes") removed that limitation. It's +now theoretically possible for the block layer to send requests that +exceed the maximum size supported by Hyper-V. This problem doesn't +currently happen in practice because the block layer defaults to a +512 Kbyte maximum, while Hyper-V in Azure supports 2 Mbyte I/O sizes. +But some future configuration of Hyper-V could have a smaller max I/O +size, and the block layer could exceed that max. + +Fix this by correctly setting max_sectors as well as sg_tablesize to +reflect the maximum I/O size that Hyper-V reports. While allowing +I/O sizes larger than the block layer default of 512 Kbytes doesn’t +provide any noticeable performance benefit in the tests we ran, it's +still appropriate to report the correct underlying Hyper-V capabilities +to the Linux block layer. + +Also tweak the virt_boundary_mask to reflect that the required +alignment derives from Hyper-V communication using a 4 Kbyte page size, +and not on the guest page size, which might be bigger (eg. ARM64). + +Link: https://lore.kernel.org/r/1655190355-28722-1-git-send-email-ssengar@linux.microsoft.com +Fixes: 3d9c3dcc58e9 ("scsi: storvsc: Enable scatter list entry lengths > 4Kbytes") +Reviewed-by: Michael Kelley +Signed-off-by: Saurabh Sengar +Signed-off-by: Martin K. Petersen +Acked-by: Olaf Hering +--- + drivers/scsi/storvsc_drv.c | 27 ++++++++++++++++++---- + 1 file changed, 22 insertions(+), 5 deletions(-) + +diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c +--- a/drivers/scsi/storvsc_drv.c ++++ b/drivers/scsi/storvsc_drv.c +@@ -1844,7 +1844,7 @@ static struct scsi_host_template scsi_driver = { + .cmd_per_lun = 2048, + .this_id = -1, + /* Ensure there are no gaps in presented sgls */ +- .virt_boundary_mask = PAGE_SIZE-1, ++ .virt_boundary_mask = HV_HYP_PAGE_SIZE - 1, + .no_write_same = 1, + .track_queue_depth = 1, + .change_queue_depth = storvsc_change_queue_depth, +@@ -1895,6 +1895,7 @@ static int storvsc_probe(struct hv_device *device, + int max_targets; + int max_channels; + int max_sub_channels = 0; ++ u32 max_xfer_bytes; + + /* + * Based on the windows host we are running on, +@@ -1968,12 +1969,28 @@ static int storvsc_probe(struct hv_device *device, + } + /* max cmd length */ + host->max_cmd_len = STORVSC_MAX_CMD_LEN; +- + /* +- * set the table size based on the info we got +- * from the host. ++ * Any reasonable Hyper-V configuration should provide ++ * max_transfer_bytes value aligning to HV_HYP_PAGE_SIZE, ++ * protecting it from any weird value. ++ */ ++ max_xfer_bytes = round_down(stor_device->max_transfer_bytes, HV_HYP_PAGE_SIZE); ++ /* max_hw_sectors_kb */ ++ host->max_sectors = max_xfer_bytes >> 9; ++ /* ++ * There are 2 requirements for Hyper-V storvsc sgl segments, ++ * based on which the below calculation for max segments is ++ * done: ++ * ++ * 1. Except for the first and last sgl segment, all sgl segments ++ * should be align to HV_HYP_PAGE_SIZE, that also means the ++ * maximum number of segments in a sgl can be calculated by ++ * dividing the total max transfer length by HV_HYP_PAGE_SIZE. ++ * ++ * 2. Except for the first and last, each entry in the SGL must ++ * have an offset that is a multiple of HV_HYP_PAGE_SIZE. + */ +- host->sg_tablesize = (stor_device->max_transfer_bytes >> PAGE_SHIFT); ++ host->sg_tablesize = (max_xfer_bytes >> HV_HYP_PAGE_SHIFT) + 1; + /* + * For non-IDE disks, the host supports multiple channels. + * Set the number of HW queues we are supporting. diff --git a/patches.suse/msft-hv-2626-drm-hyperv-drm-Include-framebuffer-and-EDID-headers.patch b/patches.suse/msft-hv-2626-drm-hyperv-drm-Include-framebuffer-and-EDID-headers.patch new file mode 100644 index 0000000..2026833 --- /dev/null +++ b/patches.suse/msft-hv-2626-drm-hyperv-drm-Include-framebuffer-and-EDID-headers.patch @@ -0,0 +1,61 @@ +From: Thomas Zimmermann +Date: Wed, 22 Jun 2022 10:34:13 +0200 +Patch-mainline: v6.0-rc1 +Subject: drm/hyperv-drm: Include framebuffer and EDID headers +Git-commit: 009a3a52791f31c57d755a73f6bc66fbdd8bd76c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +References: git-fixes + +Fix a number of compile errors by including the correct header +files. Examples are shown below. + + ../drivers/gpu/drm/hyperv/hyperv_drm_modeset.c: In function 'hyperv_blit_to_vram_rect': + ../drivers/gpu/drm/hyperv/hyperv_drm_modeset.c:25:48: error: invalid use of undefined type 'struct drm_framebuffer' + 25 | struct hyperv_drm_device *hv = to_hv(fb->dev); + | ^~ + + ../drivers/gpu/drm/hyperv/hyperv_drm_modeset.c: In function 'hyperv_connector_get_modes': + ../drivers/gpu/drm/hyperv/hyperv_drm_modeset.c:59:17: error: implicit declaration of function 'drm_add_modes_noedid' [-Werror=implicit-function-declaration] + 59 | count = drm_add_modes_noedid(connector, + | ^~~~~~~~~~~~~~~~~~~~ + + ../drivers/gpu/drm/hyperv/hyperv_drm_modeset.c:62:9: error: implicit declaration of function 'drm_set_preferred_mode'; did you mean 'drm_mm_reserve_node'? [-Werror=implicit-function-declaration] + 62 | drm_set_preferred_mode(connector, hv->preferred_width, + | ^~~~~~~~~~~~~~~~~~~~~~ + +Signed-off-by: Thomas Zimmermann +Fixes: 76c56a5affeb ("drm/hyperv: Add DRM driver for hyperv synthetic video device") +Fixes: 720cf96d8fec ("drm: Drop drm_framebuffer.h from drm_crtc.h") +Fixes: 255490f9150d ("drm: Drop drm_edid.h from drm_crtc.h") +Cc: Deepak Rawat +Cc: Thomas Zimmermann +Cc: Maarten Lankhorst +Cc: Maxime Ripard +Cc: linux-hyperv@vger.kernel.org +Cc: dri-devel@lists.freedesktop.org +Cc: # v5.14+ +Acked-by: Maxime Ripard +Reviewed-by: Ville Syrjälä +Link: https://patchwork.freedesktop.org/patch/msgid/20220622083413.12573-1-tzimmermann@suse.de +Acked-by: Olaf Hering +--- + drivers/gpu/drm/hyperv/hyperv_drm_modeset.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c b/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c +--- a/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c ++++ b/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c +@@ -7,9 +7,11 @@ + + #include + #include ++#include + #include + #include + #include ++#include + #include + #include + #include diff --git a/patches.suse/msft-hv-2630-Drivers-hv-vm_bus-Handle-vmbus-rescind-calls-after-v.patch b/patches.suse/msft-hv-2630-Drivers-hv-vm_bus-Handle-vmbus-rescind-calls-after-v.patch new file mode 100644 index 0000000..dc54406 --- /dev/null +++ b/patches.suse/msft-hv-2630-Drivers-hv-vm_bus-Handle-vmbus-rescind-calls-after-v.patch @@ -0,0 +1,131 @@ +From: Shradha Gupta +Date: Sun, 10 Jul 2022 21:11:47 -0700 +Patch-mainline: v6.0-rc1 +Subject: Drivers: hv: vm_bus: Handle vmbus rescind calls after vmbus is suspended +Git-commit: 52be93558a9b32f5294750c1394d81e31fe11d6d +References: git-fixes + +Add a flag to indicate that the vmbus is suspended so we should ignore +any offer message. Add a new work_queue for rescind msg, so we could drain +it along with other offer work_queues upon suspension. +It was observed that in some hibernation related scenario testing, after +vmbus_bus_suspend() we get rescind offer message for the vmbus. This would +lead to processing of a rescind message for a channel that has already been +suspended. + +Signed-off-by: Shradha Gupta +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20220711041147.GA5569@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net +Signed-off-by: Wei Liu +Acked-by: Olaf Hering +--- + drivers/hv/connection.c | 11 +++++++++ + drivers/hv/hyperv_vmbus.h | 7 ++++++ + drivers/hv/vmbus_drv.c | 27 +++++++++++++++------- + 3 files changed, 37 insertions(+), 8 deletions(-) + +diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c +--- a/drivers/hv/connection.c ++++ b/drivers/hv/connection.c +@@ -171,6 +171,14 @@ int vmbus_connect(void) + goto cleanup; + } + ++ vmbus_connection.rescind_work_queue = ++ create_workqueue("hv_vmbus_rescind"); ++ if (!vmbus_connection.rescind_work_queue) { ++ ret = -ENOMEM; ++ goto cleanup; ++ } ++ vmbus_connection.ignore_any_offer_msg = false; ++ + vmbus_connection.handle_primary_chan_wq = + create_workqueue("hv_pri_chan"); + if (!vmbus_connection.handle_primary_chan_wq) { +@@ -357,6 +365,9 @@ void vmbus_disconnect(void) + if (vmbus_connection.handle_primary_chan_wq) + destroy_workqueue(vmbus_connection.handle_primary_chan_wq); + ++ if (vmbus_connection.rescind_work_queue) ++ destroy_workqueue(vmbus_connection.rescind_work_queue); ++ + if (vmbus_connection.work_queue) + destroy_workqueue(vmbus_connection.work_queue); + +diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h +--- a/drivers/hv/hyperv_vmbus.h ++++ b/drivers/hv/hyperv_vmbus.h +@@ -261,6 +261,13 @@ struct vmbus_connection { + struct workqueue_struct *work_queue; + struct workqueue_struct *handle_primary_chan_wq; + struct workqueue_struct *handle_sub_chan_wq; ++ struct workqueue_struct *rescind_work_queue; ++ ++ /* ++ * On suspension of the vmbus, the accumulated offer messages ++ * must be dropped. ++ */ ++ bool ignore_any_offer_msg; + + /* + * The number of sub-channels and hv_sock channels that should be +diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c +--- a/drivers/hv/vmbus_drv.c ++++ b/drivers/hv/vmbus_drv.c +@@ -1160,7 +1160,9 @@ void vmbus_on_msg_dpc(unsigned long data) + * work queue: the RESCIND handler can not start to + * run before the OFFER handler finishes. + */ +- schedule_work(&ctx->work); ++ if (vmbus_connection.ignore_any_offer_msg) ++ break; ++ queue_work(vmbus_connection.rescind_work_queue, &ctx->work); + break; + + case CHANNELMSG_OFFERCHANNEL: +@@ -1186,6 +1188,8 @@ void vmbus_on_msg_dpc(unsigned long data) + * to the CPUs which will execute the offer & rescind + * works by the time these works will start execution. + */ ++ if (vmbus_connection.ignore_any_offer_msg) ++ break; + atomic_inc(&vmbus_connection.offer_in_progress); + fallthrough; + +@@ -2446,15 +2450,20 @@ acpi_walk_err: + #ifdef CONFIG_PM_SLEEP + static int vmbus_bus_suspend(struct device *dev) + { ++ struct hv_per_cpu_context *hv_cpu = per_cpu_ptr( ++ hv_context.cpu_context, VMBUS_CONNECT_CPU); + struct vmbus_channel *channel, *sc; + +- while (atomic_read(&vmbus_connection.offer_in_progress) != 0) { +- /* +- * We wait here until the completion of any channel +- * offers that are currently in progress. +- */ +- usleep_range(1000, 2000); +- } ++ tasklet_disable(&hv_cpu->msg_dpc); ++ vmbus_connection.ignore_any_offer_msg = true; ++ /* The tasklet_enable() takes care of providing a memory barrier */ ++ tasklet_enable(&hv_cpu->msg_dpc); ++ ++ /* Drain all the workqueues as we are in suspend */ ++ drain_workqueue(vmbus_connection.rescind_work_queue); ++ drain_workqueue(vmbus_connection.work_queue); ++ drain_workqueue(vmbus_connection.handle_primary_chan_wq); ++ drain_workqueue(vmbus_connection.handle_sub_chan_wq); + + mutex_lock(&vmbus_connection.channel_mutex); + list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { +@@ -2531,6 +2540,8 @@ static int vmbus_bus_resume(struct device *dev) + size_t msgsize; + int ret; + ++ vmbus_connection.ignore_any_offer_msg = false; ++ + /* + * We only use the 'vmbus_proto_version', which was in use before + * hibernation, to re-negotiate with the host. diff --git a/patches.suse/msft-hv-2634-Drivers-hv-Create-debugfs-file-with-hyper-v-balloon-.patch b/patches.suse/msft-hv-2634-Drivers-hv-Create-debugfs-file-with-hyper-v-balloon-.patch new file mode 100644 index 0000000..e11686d --- /dev/null +++ b/patches.suse/msft-hv-2634-Drivers-hv-Create-debugfs-file-with-hyper-v-balloon-.patch @@ -0,0 +1,227 @@ +From: Alexander Atanasov +Date: Mon, 11 Jul 2022 18:18:22 +0000 +Patch-mainline: v6.0-rc1 +Subject: Drivers: hv: Create debugfs file with hyper-v balloon usage information +Git-commit: d180e0a1be6cea2b7436fadbd1c96aecdf3c46c7 +References: git-fixes + +Allow the guest to know how much it is ballooned by the host. +It is useful when debugging out of memory conditions. + +When host gets back memory from the guest it is accounted +as used memory in the guest but the guest have no way to know +how much it is actually ballooned. + +Expose current state, flags and max possible memory to the guest. +While at it - fix a 10+ years old typo. + +Signed-off-by: Alexander Atanasov +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20220711181825.52318-1-alexander.atanasov@virtuozzo.com +Signed-off-by: Wei Liu +Acked-by: Olaf Hering +--- + drivers/hv/hv_balloon.c | 135 +++++++++++++++++++++- + 1 file changed, 129 insertions(+), 6 deletions(-) + +diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c +--- a/drivers/hv/hv_balloon.c ++++ b/drivers/hv/hv_balloon.c +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -248,7 +249,7 @@ struct dm_capabilities_resp_msg { + * num_committed: Committed memory in pages. + * page_file_size: The accumulated size of all page files + * in the system in pages. +- * zero_free: The nunber of zero and free pages. ++ * zero_free: The number of zero and free pages. + * page_file_writes: The writes to the page file in pages. + * io_diff: An indicator of file cache efficiency or page file activity, + * calculated as File Cache Page Fault Count - Page Read Count. +@@ -567,6 +568,11 @@ struct hv_dynmem_device { + __u32 version; + + struct page_reporting_dev_info pr_dev_info; ++ ++ /* ++ * Maximum number of pages that can be hot_add-ed ++ */ ++ __u64 max_dynamic_page_count; + }; + + static struct hv_dynmem_device dm_device; +@@ -1078,6 +1084,7 @@ static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg) + + pr_info("Max. dynamic memory size: %llu MB\n", + (*max_page_count) >> (20 - HV_HYP_PAGE_SHIFT)); ++ dm->max_dynamic_page_count = *max_page_count; + } + + break; +@@ -1116,6 +1123,19 @@ static unsigned long compute_balloon_floor(void) + return min_pages; + } + ++/* ++ * Compute total committed memory pages ++ */ ++ ++static unsigned long get_pages_committed(struct hv_dynmem_device *dm) ++{ ++ return vm_memory_committed() + ++ dm->num_pages_ballooned + ++ (dm->num_pages_added > dm->num_pages_onlined ? ++ dm->num_pages_added - dm->num_pages_onlined : 0) + ++ compute_balloon_floor(); ++} ++ + /* + * Post our status as it relates memory pressure to the + * host. Host expects the guests to post this status +@@ -1157,11 +1177,7 @@ static void post_status(struct hv_dynmem_device *dm) + * asking us to balloon them out. + */ + num_pages_avail = si_mem_available(); +- num_pages_committed = vm_memory_committed() + +- dm->num_pages_ballooned + +- (dm->num_pages_added > dm->num_pages_onlined ? +- dm->num_pages_added - dm->num_pages_onlined : 0) + +- compute_balloon_floor(); ++ num_pages_committed = get_pages_committed(dm); + + trace_balloon_status(num_pages_avail, num_pages_committed, + vm_memory_committed(), dm->num_pages_ballooned, +@@ -1807,6 +1823,109 @@ out: + return ret; + } + ++/* ++ * DEBUGFS Interface ++ */ ++#ifdef CONFIG_DEBUG_FS ++ ++/** ++ * hv_balloon_debug_show - shows statistics of balloon operations. ++ * @f: pointer to the &struct seq_file. ++ * @offset: ignored. ++ * ++ * Provides the statistics that can be accessed in hv-balloon in the debugfs. ++ * ++ * Return: zero on success or an error code. ++ */ ++static int hv_balloon_debug_show(struct seq_file *f, void *offset) ++{ ++ struct hv_dynmem_device *dm = f->private; ++ char *sname; ++ ++ seq_printf(f, "%-22s: %u.%u\n", "host_version", ++ DYNMEM_MAJOR_VERSION(dm->version), ++ DYNMEM_MINOR_VERSION(dm->version)); ++ ++ seq_printf(f, "%-22s:", "capabilities"); ++ if (ballooning_enabled()) ++ seq_puts(f, " enabled"); ++ ++ if (hot_add_enabled()) ++ seq_puts(f, " hot_add"); ++ ++ seq_puts(f, "\n"); ++ ++ seq_printf(f, "%-22s: %u", "state", dm->state); ++ switch (dm->state) { ++ case DM_INITIALIZING: ++ sname = "Initializing"; ++ break; ++ case DM_INITIALIZED: ++ sname = "Initialized"; ++ break; ++ case DM_BALLOON_UP: ++ sname = "Balloon Up"; ++ break; ++ case DM_BALLOON_DOWN: ++ sname = "Balloon Down"; ++ break; ++ case DM_HOT_ADD: ++ sname = "Hot Add"; ++ break; ++ case DM_INIT_ERROR: ++ sname = "Error"; ++ break; ++ default: ++ sname = "Unknown"; ++ } ++ seq_printf(f, " (%s)\n", sname); ++ ++ /* HV Page Size */ ++ seq_printf(f, "%-22s: %ld\n", "page_size", HV_HYP_PAGE_SIZE); ++ ++ /* Pages added with hot_add */ ++ seq_printf(f, "%-22s: %u\n", "pages_added", dm->num_pages_added); ++ ++ /* pages that are "onlined"/used from pages_added */ ++ seq_printf(f, "%-22s: %u\n", "pages_onlined", dm->num_pages_onlined); ++ ++ /* pages we have given back to host */ ++ seq_printf(f, "%-22s: %u\n", "pages_ballooned", dm->num_pages_ballooned); ++ ++ seq_printf(f, "%-22s: %lu\n", "total_pages_committed", ++ get_pages_committed(dm)); ++ ++ seq_printf(f, "%-22s: %llu\n", "max_dynamic_page_count", ++ dm->max_dynamic_page_count); ++ ++ return 0; ++} ++ ++DEFINE_SHOW_ATTRIBUTE(hv_balloon_debug); ++ ++static void hv_balloon_debugfs_init(struct hv_dynmem_device *b) ++{ ++ debugfs_create_file("hv-balloon", 0444, NULL, b, ++ &hv_balloon_debug_fops); ++} ++ ++static void hv_balloon_debugfs_exit(struct hv_dynmem_device *b) ++{ ++ debugfs_remove(debugfs_lookup("hv-balloon", NULL)); ++} ++ ++#else ++ ++static inline void hv_balloon_debugfs_init(struct hv_dynmem_device *b) ++{ ++} ++ ++static inline void hv_balloon_debugfs_exit(struct hv_dynmem_device *b) ++{ ++} ++ ++#endif /* CONFIG_DEBUG_FS */ ++ + static int balloon_probe(struct hv_device *dev, + const struct hv_vmbus_device_id *dev_id) + { +@@ -1854,6 +1973,8 @@ static int balloon_probe(struct hv_device *dev, + goto probe_error; + } + ++ hv_balloon_debugfs_init(&dm_device); ++ + return 0; + + probe_error: +@@ -1879,6 +2000,8 @@ static int balloon_remove(struct hv_device *dev) + if (dm->num_pages_ballooned != 0) + pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned); + ++ hv_balloon_debugfs_exit(dm); ++ + cancel_work_sync(&dm->balloon_wrk.wrk); + cancel_work_sync(&dm->ha_wrk.wrk); + diff --git a/patches.suse/msft-hv-2640-drm-hyperv-Fix-an-error-handling-path-in-hyperv_vmbu.patch b/patches.suse/msft-hv-2640-drm-hyperv-Fix-an-error-handling-path-in-hyperv_vmbu.patch new file mode 100644 index 0000000..7020e0f --- /dev/null +++ b/patches.suse/msft-hv-2640-drm-hyperv-Fix-an-error-handling-path-in-hyperv_vmbu.patch @@ -0,0 +1,55 @@ +From: Christophe JAILLET +Date: Sun, 31 Jul 2022 22:01:55 +0200 +Patch-mainline: v6.0-rc6 +Subject: drm/hyperv: Fix an error handling path in hyperv_vmbus_probe() +Git-commit: f1f63cbb705dc38826369496c6fc12c1b8db1324 +References: git-fixes + +hyperv_setup_vram() calls vmbus_allocate_mmio(). This must be undone in +the error handling path of the probe, as already done in the remove +function. + +Fixes: a0ab5abced55 ("drm/hyperv : Removing the restruction of VRAM allocation with PCI bar size") +Signed-off-by: Christophe JAILLET +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/7dfa372af3e35fbb1d6f157183dfef2e4512d3be.1659297696.git.christophe.jaillet@wanadoo.fr +Signed-off-by: Wei Liu +Acked-by: Olaf Hering +--- + drivers/gpu/drm/hyperv/hyperv_drm_drv.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c +--- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c ++++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c +@@ -133,7 +133,6 @@ static int hyperv_vmbus_probe(struct hv_device *hdev, + } + + ret = hyperv_setup_vram(hv, hdev); +- + if (ret) + goto err_vmbus_close; + +@@ -150,18 +149,20 @@ static int hyperv_vmbus_probe(struct hv_device *hdev, + + ret = hyperv_mode_config_init(hv); + if (ret) +- goto err_vmbus_close; ++ goto err_free_mmio; + + ret = drm_dev_register(dev, 0); + if (ret) { + drm_err(dev, "Failed to register drm driver.\n"); +- goto err_vmbus_close; ++ goto err_free_mmio; + } + + drm_fbdev_generic_setup(dev, 0); + + return 0; + ++err_free_mmio: ++ vmbus_free_mmio(hv->mem->start, hv->fb_size); + err_vmbus_close: + vmbus_close(hdev->channel); + err_hv_set_drv_data: diff --git a/patches.suse/msft-hv-2654-drm-hyperv-Don-t-overwrite-dirt_needed-value-set-by-.patch b/patches.suse/msft-hv-2654-drm-hyperv-Don-t-overwrite-dirt_needed-value-set-by-.patch index cd4297f..909f825 100644 --- a/patches.suse/msft-hv-2654-drm-hyperv-Don-t-overwrite-dirt_needed-value-set-by-.patch +++ b/patches.suse/msft-hv-2654-drm-hyperv-Don-t-overwrite-dirt_needed-value-set-by-.patch @@ -33,4 +33,4 @@ diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hy - ret = hyperv_mode_config_init(hv); if (ret) - goto err_vmbus_close; + goto err_free_mmio; diff --git a/patches.suse/msft-hv-2667-hyperv-simplify-and-rename-generate_guest_id.patch b/patches.suse/msft-hv-2667-hyperv-simplify-and-rename-generate_guest_id.patch new file mode 100644 index 0000000..46df15c --- /dev/null +++ b/patches.suse/msft-hv-2667-hyperv-simplify-and-rename-generate_guest_id.patch @@ -0,0 +1,73 @@ +From: Li kunyu +Date: Wed, 28 Sep 2022 14:40:46 +0800 +Patch-mainline: v6.1-rc1 +Subject: hyperv: simplify and rename generate_guest_id +Git-commit: d5ebde1e2b46154d7e03efb1ae3039a304e5386d +References: bsc#1189965 + +The generate_guest_id function is more suitable for use after the +following modifications. + +1. The return value of the function is modified to u64. +2. Remove the d_info1 and d_info2 parameters from the function, keep the + u64 type kernel_version parameter. +3. Rename the function to make it clearly a Hyper-V related function, + and modify it to hv_generate_guest_id. + +Signed-off-by: Li kunyu +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20220928064046.3545-1-kunyu@nfschina.com +Signed-off-by: Wei Liu +Acked-by: Olaf Hering +--- + arch/arm64/hyperv/mshyperv.c | 2 +- + arch/x86/hyperv/hv_init.c | 2 +- + include/asm-generic/mshyperv.h | 9 +++------ + 3 files changed, 5 insertions(+), 8 deletions(-) + +diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c +--- a/arch/arm64/hyperv/mshyperv.c ++++ b/arch/arm64/hyperv/mshyperv.c +@@ -38,7 +38,7 @@ static int __init hyperv_init(void) + return 0; + + /* Setup the guest ID */ +- guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); ++ guest_id = hv_generate_guest_id(LINUX_VERSION_CODE); + hv_set_vpreg(HV_REGISTER_GUEST_OSID, guest_id); + + /* Get the features and hints from Hyper-V */ +diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c +--- a/arch/x86/hyperv/hv_init.c ++++ b/arch/x86/hyperv/hv_init.c +@@ -426,7 +426,7 @@ void __init hyperv_init(void) + * 1. Register the guest ID + * 2. Enable the hypercall and register the hypercall page + */ +- guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); ++ guest_id = hv_generate_guest_id(LINUX_VERSION_CODE); + wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); + + /* Hyper-V requires to write guest os id via ghcb in SNP IVM. */ +diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h +--- a/include/asm-generic/mshyperv.h ++++ b/include/asm-generic/mshyperv.h +@@ -105,15 +105,12 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size, + } + + /* Generate the guest OS identifier as described in the Hyper-V TLFS */ +-static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version, +- __u64 d_info2) ++static inline u64 hv_generate_guest_id(u64 kernel_version) + { +- __u64 guest_id = 0; ++ u64 guest_id; + +- guest_id = (((__u64)HV_LINUX_VENDOR_ID) << 48); +- guest_id |= (d_info1 << 48); ++ guest_id = (((u64)HV_LINUX_VENDOR_ID) << 48); + guest_id |= (kernel_version << 16); +- guest_id |= d_info2; + + return guest_id; + } diff --git a/patches.suse/mtd-add-add_disk-error-handling.patch b/patches.suse/mtd-add-add_disk-error-handling.patch new file mode 100644 index 0000000..c29129c --- /dev/null +++ b/patches.suse/mtd-add-add_disk-error-handling.patch @@ -0,0 +1,47 @@ +From: Luis Chamberlain +Date: Fri, 15 Oct 2021 16:30:28 -0700 +Subject: [PATCH] mtd: add add_disk() error handling +Git-commit: 83b863f4a3f0de4ece7802d9121fed0c3e64145f +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +Acked-by: Miquel Raynal +Signed-off-by: Luis Chamberlain +Link: https://lore.kernel.org/r/20211015233028.2167651-10-mcgrof@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/mtd/mtd_blkdevs.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c +index b8ae1ec14e17..4eaba6f4ec68 100644 +--- a/drivers/mtd/mtd_blkdevs.c ++++ b/drivers/mtd/mtd_blkdevs.c +@@ -384,7 +384,9 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) + if (new->readonly) + set_disk_ro(gd, 1); + +- device_add_disk(&new->mtd->dev, gd, NULL); ++ ret = device_add_disk(&new->mtd->dev, gd, NULL); ++ if (ret) ++ goto out_cleanup_disk; + + if (new->disk_attributes) { + ret = sysfs_create_group(&disk_to_dev(gd)->kobj, +@@ -393,6 +395,8 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) + } + return 0; + ++out_cleanup_disk: ++ blk_cleanup_disk(new->disk); + out_free_tag_set: + blk_mq_free_tag_set(new->tag_set); + out_kfree_tag_set: +-- +2.35.3 + diff --git a/patches.suse/mtip32xx-add-error-handling-support-for-add_disk.patch b/patches.suse/mtip32xx-add-error-handling-support-for-add_disk.patch new file mode 100644 index 0000000..2f4dffa --- /dev/null +++ b/patches.suse/mtip32xx-add-error-handling-support-for-add_disk.patch @@ -0,0 +1,39 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:01:48 -0700 +Subject: [PATCH] mtip32xx: add error handling support for add_disk() +Git-commit: 4a32e1cdb745ea9f66358810b0ce85698033f57e +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +The read_capacity_error error label already does what we need, +so just re-use that. + +Signed-off-by: Luis Chamberlain +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/mtip32xx/mtip32xx.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c +index 901855717cb5..d0b40309f47e 100644 +--- a/drivers/block/mtip32xx/mtip32xx.c ++++ b/drivers/block/mtip32xx/mtip32xx.c +@@ -3633,7 +3633,9 @@ static int mtip_block_initialize(struct driver_data *dd) + set_capacity(dd->disk, capacity); + + /* Enable the block device and add it to /dev */ +- device_add_disk(&dd->pdev->dev, dd->disk, mtip_disk_attr_groups); ++ rv = device_add_disk(&dd->pdev->dev, dd->disk, mtip_disk_attr_groups); ++ if (rv) ++ goto read_capacity_error; + + if (dd->mtip_svc_handler) { + set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag); +-- +2.35.3 + diff --git a/patches.suse/n64cart-add-error-handling-support-for-add_disk.patch b/patches.suse/n64cart-add-error-handling-support-for-add_disk.patch new file mode 100644 index 0000000..3bd30f8 --- /dev/null +++ b/patches.suse/n64cart-add-error-handling-support-for-add_disk.patch @@ -0,0 +1,62 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:01:02 -0700 +Subject: [PATCH] n64cart: add error handling support for add_disk() +Git-commit: d1df6021b70ce7823df89941c0c97e746fa2ad92 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +Signed-off-by: Luis Chamberlain +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/n64cart.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c +index b168ca25b6c9..78282f01f581 100644 +--- a/drivers/block/n64cart.c ++++ b/drivers/block/n64cart.c +@@ -115,6 +115,7 @@ static const struct block_device_operations n64cart_fops = { + static int __init n64cart_probe(struct platform_device *pdev) + { + struct gendisk *disk; ++ int err = -ENOMEM; + + if (!start || !size) { + pr_err("start or size not specified\n"); +@@ -132,7 +133,7 @@ static int __init n64cart_probe(struct platform_device *pdev) + + disk = blk_alloc_disk(NUMA_NO_NODE); + if (!disk) +- return -ENOMEM; ++ goto out; + + disk->first_minor = 0; + disk->flags = GENHD_FL_NO_PART_SCAN; +@@ -147,11 +148,18 @@ static int __init n64cart_probe(struct platform_device *pdev) + blk_queue_physical_block_size(disk->queue, 4096); + blk_queue_logical_block_size(disk->queue, 4096); + +- add_disk(disk); ++ err = add_disk(disk); ++ if (err) ++ goto out_cleanup_disk; + + pr_info("n64cart: %u kb disk\n", size / 1024); + + return 0; ++ ++out_cleanup_disk: ++ blk_cleanup_disk(disk); ++out: ++ return err; + } + + static struct platform_driver n64cart_driver = { +-- +2.35.3 + diff --git a/patches.suse/net-Add-includes-masked-by-netdevice.h-including-uap.patch b/patches.suse/net-Add-includes-masked-by-netdevice.h-including-uap.patch new file mode 100644 index 0000000..4870334 --- /dev/null +++ b/patches.suse/net-Add-includes-masked-by-netdevice.h-including-uap.patch @@ -0,0 +1,97 @@ +From: Jakub Kicinski +Date: Wed, 29 Dec 2021 17:27:41 -0800 +Subject: net: Add includes masked by netdevice.h including uapi/bpf.h +Patch-mainline: v5.17-rc1 +Git-commit: 3b80b73a4b3de38f72cd79e1a157449917f2bcb5 +References: jsc#PED-1368 + +Add missing includes unmasked by the subsequent change. + +Mostly network drivers missing an include for XDP_PACKET_HEADROOM. + +Signed-off-by: Jakub Kicinski +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211230012742.770642-2-kuba@kernel.org +Acked-by: Shung-Hsi Yu +--- + drivers/net/ethernet/amazon/ena/ena_netdev.h | 1 + + drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 1 + + drivers/net/ethernet/microsoft/mana/mana_en.c | 2 ++ + drivers/net/ethernet/stmicro/stmmac/stmmac.h | 1 + + drivers/net/ethernet/ti/cpsw_priv.h | 2 ++ + include/net/ip6_fib.h | 1 + + kernel/bpf/net_namespace.c | 1 + + 7 files changed, 9 insertions(+) + +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + + #include "ena_com.h" + #include "ena_eth_com.h" +--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c ++++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + + #include "nic_reg.h" + #include "nic.h" +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -1,6 +1,8 @@ + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + /* Copyright (c) 2021, Microsoft Corporation. */ + ++#include ++ + #include + #include + #include +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h +@@ -23,6 +23,7 @@ + #include + #include + #include ++#include + + struct stmmac_resources { + void __iomem *addr; +--- a/drivers/net/ethernet/ti/cpsw_priv.h ++++ b/drivers/net/ethernet/ti/cpsw_priv.h +@@ -6,6 +6,8 @@ + #ifndef DRIVERS_NET_ETHERNET_TI_CPSW_PRIV_H_ + #define DRIVERS_NET_ETHERNET_TI_CPSW_PRIV_H_ + ++#include ++ + #include "davinci_cpdma.h" + + #define CPSW_DEBUG (NETIF_MSG_HW | NETIF_MSG_WOL | \ +--- a/include/net/ip6_fib.h ++++ b/include/net/ip6_fib.h +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + + #ifdef CONFIG_IPV6_MULTIPLE_TABLES + #define FIB6_TABLE_HASHSZ 256 +--- a/kernel/bpf/net_namespace.c ++++ b/kernel/bpf/net_namespace.c +@@ -1,6 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0 + + #include ++#include + #include + #include + diff --git a/patches.suse/net-bpf-Handle-return-value-of-BPF_CGROUP_RUN_PROG_I.patch b/patches.suse/net-bpf-Handle-return-value-of-BPF_CGROUP_RUN_PROG_I.patch new file mode 100644 index 0000000..d3dbe4a --- /dev/null +++ b/patches.suse/net-bpf-Handle-return-value-of-BPF_CGROUP_RUN_PROG_I.patch @@ -0,0 +1,146 @@ +From: Menglong Dong +Date: Thu, 6 Jan 2022 21:20:20 +0800 +Subject: net: bpf: Handle return value of + BPF_CGROUP_RUN_PROG_INET{4,6}_POST_BIND() +Patch-mainline: v5.17-rc1 +Git-commit: 91a760b26926265a60c77ddf016529bcf3e17a04 +References: jsc#PED-1368 + +The return value of BPF_CGROUP_RUN_PROG_INET{4,6}_POST_BIND() in +__inet_bind() is not handled properly. While the return value +is non-zero, it will set inet_saddr and inet_rcv_saddr to 0 and +exit: + + err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk); + if (err) { + inet->inet_saddr = inet->inet_rcv_saddr = 0; + goto out_release_sock; + } + +Let's take UDP for example and see what will happen. For UDP +socket, it will be added to 'udp_prot.h.udp_table->hash' and +'udp_prot.h.udp_table->hash2' after the sk->sk_prot->get_port() +called success. If 'inet->inet_rcv_saddr' is specified here, +then 'sk' will be in the 'hslot2' of 'hash2' that it don't belong +to (because inet_saddr is changed to 0), and UDP packet received +will not be passed to this sock. If 'inet->inet_rcv_saddr' is not +specified here, the sock will work fine, as it can receive packet +properly, which is wired, as the 'bind()' is already failed. + +To undo the get_port() operation, introduce the 'put_port' field +for 'struct proto'. For TCP proto, it is inet_put_port(); For UDP +proto, it is udp_lib_unhash(); For icmp proto, it is +ping_unhash(). + +Therefore, after sys_bind() fail caused by +BPF_CGROUP_RUN_PROG_INET4_POST_BIND(), it will be unbinded, which +means that it can try to be binded to another port. + +Signed-off-by: Menglong Dong +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220106132022.3470772-2-imagedong@tencent.com +Acked-by: Shung-Hsi Yu +--- + include/net/sock.h | 1 + + net/ipv4/af_inet.c | 2 ++ + net/ipv4/ping.c | 1 + + net/ipv4/tcp_ipv4.c | 1 + + net/ipv4/udp.c | 1 + + net/ipv6/af_inet6.c | 2 ++ + net/ipv6/ping.c | 1 + + net/ipv6/tcp_ipv6.c | 1 + + net/ipv6/udp.c | 1 + + 9 files changed, 11 insertions(+) + +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -1198,6 +1198,7 @@ struct proto { + void (*unhash)(struct sock *sk); + void (*rehash)(struct sock *sk); + int (*get_port)(struct sock *sk, unsigned short snum); ++ void (*put_port)(struct sock *sk); + #ifdef CONFIG_BPF_SYSCALL + int (*psock_update_sk_prot)(struct sock *sk, + struct sk_psock *psock, +--- a/net/ipv4/af_inet.c ++++ b/net/ipv4/af_inet.c +@@ -538,6 +538,8 @@ int __inet_bind(struct sock *sk, struct + err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk); + if (err) { + inet->inet_saddr = inet->inet_rcv_saddr = 0; ++ if (sk->sk_prot->put_port) ++ sk->sk_prot->put_port(sk); + goto out_release_sock; + } + } +--- a/net/ipv4/ping.c ++++ b/net/ipv4/ping.c +@@ -998,6 +998,7 @@ struct proto ping_prot = { + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, ++ .put_port = ping_unhash, + .obj_size = sizeof(struct inet_sock), + }; + EXPORT_SYMBOL(ping_prot); +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -3067,6 +3067,7 @@ struct proto tcp_prot = { + .hash = inet_hash, + .unhash = inet_unhash, + .get_port = inet_csk_get_port, ++ .put_port = inet_put_port, + #ifdef CONFIG_BPF_SYSCALL + .psock_update_sk_prot = tcp_bpf_update_proto, + #endif +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -2925,6 +2925,7 @@ struct proto udp_prot = { + .unhash = udp_lib_unhash, + .rehash = udp_v4_rehash, + .get_port = udp_v4_get_port, ++ .put_port = udp_lib_unhash, + #ifdef CONFIG_BPF_SYSCALL + .psock_update_sk_prot = udp_bpf_update_proto, + #endif +--- a/net/ipv6/af_inet6.c ++++ b/net/ipv6/af_inet6.c +@@ -415,6 +415,8 @@ static int __inet6_bind(struct sock *sk, + if (err) { + sk->sk_ipv6only = saved_ipv6only; + inet_reset_saddr(sk); ++ if (sk->sk_prot->put_port) ++ sk->sk_prot->put_port(sk); + goto out; + } + } +--- a/net/ipv6/ping.c ++++ b/net/ipv6/ping.c +@@ -177,6 +177,7 @@ struct proto pingv6_prot = { + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, ++ .put_port = ping_unhash, + .obj_size = sizeof(struct raw6_sock), + }; + EXPORT_SYMBOL_GPL(pingv6_prot); +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -2177,6 +2177,7 @@ struct proto tcpv6_prot = { + .hash = inet6_hash, + .unhash = inet_unhash, + .get_port = inet_csk_get_port, ++ .put_port = inet_put_port, + #ifdef CONFIG_BPF_SYSCALL + .psock_update_sk_prot = tcp_bpf_update_proto, + #endif +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -1732,6 +1732,7 @@ struct proto udpv6_prot = { + .unhash = udp_lib_unhash, + .rehash = udp_v6_rehash, + .get_port = udp_v6_get_port, ++ .put_port = udp_lib_unhash, + #ifdef CONFIG_BPF_SYSCALL + .psock_update_sk_prot = udp_bpf_update_proto, + #endif diff --git a/patches.suse/net-smc-Avoid-overwriting-the-copies-of-clcsock-callback-functions b/patches.suse/net-smc-Avoid-overwriting-the-copies-of-clcsock-callback-functions new file mode 100644 index 0000000..c754666 --- /dev/null +++ b/patches.suse/net-smc-Avoid-overwriting-the-copies-of-clcsock-callback-functions @@ -0,0 +1,64 @@ +From: Wen Gu +Date: Wed, 9 Feb 2022 22:10:53 +0800 +Subject: net/smc: Avoid overwriting the copies of clcsock callback functions +Git-commit: 1de9770d121ee9294794cca0e0be8fbfa0134ee8 +Patch-mainline: v5.17-rc5 +References: git-fixes + +The callback functions of clcsock will be saved and replaced during +the fallback. But if the fallback happens more than once, then the +copies of these callback functions will be overwritten incorrectly, +resulting in a loop call issue: + +clcsk->sk_error_report + |- smc_fback_error_report() <------------------------------| + |- smc_fback_forward_wakeup() | (loop) + |- clcsock_callback() (incorrectly overwritten) | + |- smc->clcsk_error_report() ------------------| + +So this patch fixes the issue by saving these function pointers only +once in the fallback and avoiding overwriting. + +Reported-by: syzbot+4de3c0e8a263e1e499bc@syzkaller.appspotmail.com +Fixes: 341adeec9ada ("net/smc: Forward wakeup to smc socket waitqueue after fallback") +Link: https://lore.kernel.org/r/0000000000006d045e05d78776f6@google.com +Signed-off-by: Wen Gu +Signed-off-by: David S. Miller +Acked-by: Petr Tesarik +--- + net/smc/af_smc.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/net/smc/af_smc.c ++++ b/net/smc/af_smc.c +@@ -667,14 +667,17 @@ static void smc_fback_error_report(struc + static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) + { + struct sock *clcsk; ++ int rc = 0; + + mutex_lock(&smc->clcsock_release_lock); + if (!smc->clcsock) { +- mutex_unlock(&smc->clcsock_release_lock); +- return -EBADF; ++ rc = -EBADF; ++ goto out; + } + clcsk = smc->clcsock->sk; + ++ if (smc->use_fallback) ++ goto out; + smc->use_fallback = true; + smc->fallback_rsn = reason_code; + smc_stat_fallback(smc); +@@ -702,8 +705,9 @@ static int smc_switch_to_fallback(struct + smc->clcsock->sk->sk_user_data = + (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); + } ++out: + mutex_unlock(&smc->clcsock_release_lock); +- return 0; ++ return rc; + } + + /* fall back during connect */ diff --git a/patches.suse/net-smc-Fix-an-error-code-in-smc_lgr_create b/patches.suse/net-smc-Fix-an-error-code-in-smc_lgr_create new file mode 100644 index 0000000..aa6cdca --- /dev/null +++ b/patches.suse/net-smc-Fix-an-error-code-in-smc_lgr_create @@ -0,0 +1,31 @@ +From: Dan Carpenter +Date: Fri, 14 Oct 2022 12:34:36 +0300 +Subject: net/smc: Fix an error code in smc_lgr_create() +Git-commit: bdee15e8c58b450ad736a2b62ef8c7a12548b704 +Patch-mainline: v6.1-rc2 +References: git-fixes + +If smc_wr_alloc_lgr_mem() fails then return an error code. Don't return +success. + +Fixes: 8799e310fb3f ("net/smc: add v2 support to the work request layer") +Signed-off-by: Dan Carpenter +Reviewed-by: Wenjia Zhang +Signed-off-by: David S. Miller +Acked-by: Petr Tesarik +--- + net/smc/smc_core.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/smc/smc_core.c ++++ b/net/smc/smc_core.c +@@ -887,7 +887,8 @@ static int smc_lgr_create(struct smc_soc + } + memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1], + SMC_MAX_PNETID_LEN); +- if (smc_wr_alloc_lgr_mem(lgr)) ++ rc = smc_wr_alloc_lgr_mem(lgr); ++ if (rc) + goto free_wq; + smc_llc_lgr_init(lgr, smc); + diff --git a/patches.suse/net-smc-Fix-possible-access-to-freed-memory-in-link-clear b/patches.suse/net-smc-Fix-possible-access-to-freed-memory-in-link-clear new file mode 100644 index 0000000..c5eb9d1 --- /dev/null +++ b/patches.suse/net-smc-Fix-possible-access-to-freed-memory-in-link-clear @@ -0,0 +1,132 @@ +From: Yacan Liu +Date: Tue, 6 Sep 2022 21:01:39 +0800 +Subject: net/smc: Fix possible access to freed memory in link clear +Git-commit: e9b1a4f867ae9c1dbd1d71cd09cbdb3239fb4968 +Patch-mainline: v6.0-rc5 +References: git-fixes + +After modifying the QP to the Error state, all RX WR would be completed +with WC in IB_WC_WR_FLUSH_ERR status. Current implementation does not +wait for it is done, but destroy the QP and free the link group directly. +So there is a risk that accessing the freed memory in tasklet context. + +Here is a crash example: + + BUG: unable to handle page fault for address: ffffffff8f220860 + #PF: supervisor write access in kernel mode + #PF: error_code(0x0002) - not-present page + PGD f7300e067 P4D f7300e067 PUD f7300f063 PMD 8c4e45063 PTE 800ffff08c9df060 + Oops: 0002 [#1] SMP PTI + CPU: 1 PID: 0 Comm: swapper/1 Kdump: loaded Tainted: G S OE 5.10.0-0607+ #23 + Hardware name: Inspur NF5280M4/YZMB-00689-101, BIOS 4.1.20 07/09/2018 + RIP: 0010:native_queued_spin_lock_slowpath+0x176/0x1b0 + Code: f3 90 48 8b 32 48 85 f6 74 f6 eb d5 c1 ee 12 83 e0 03 83 ee 01 48 c1 e0 05 48 63 f6 48 05 00 c8 02 00 48 03 04 f5 00 09 98 8e <48> 89 10 8b 42 08 85 c0 75 09 f3 90 8b 42 08 85 c0 74 f7 48 8b 32 + RSP: 0018:ffffb3b6c001ebd8 EFLAGS: 00010086 + RAX: ffffffff8f220860 RBX: 0000000000000246 RCX: 0000000000080000 + RDX: ffff91db1f86c800 RSI: 000000000000173c RDI: ffff91db62bace00 + RBP: ffff91db62bacc00 R08: 0000000000000000 R09: c00000010000028b + R10: 0000000000055198 R11: ffffb3b6c001ea58 R12: ffff91db80e05010 + R13: 000000000000000a R14: 0000000000000006 R15: 0000000000000040 + FS: 0000000000000000(0000) GS:ffff91db1f840000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: ffffffff8f220860 CR3: 00000001f9580004 CR4: 00000000003706e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + Call Trace: + + _raw_spin_lock_irqsave+0x30/0x40 + mlx5_ib_poll_cq+0x4c/0xc50 [mlx5_ib] + smc_wr_rx_tasklet_fn+0x56/0xa0 [smc] + tasklet_action_common.isra.21+0x66/0x100 + __do_softirq+0xd5/0x29c + asm_call_irq_on_stack+0x12/0x20 + + do_softirq_own_stack+0x37/0x40 + irq_exit_rcu+0x9d/0xa0 + sysvec_call_function_single+0x34/0x80 + asm_sysvec_call_function_single+0x12/0x20 + +Fixes: bd4ad57718cc ("smc: initialize IB transport incl. PD, MR, QP, CQ, event, WR") +Signed-off-by: Yacan Liu +Reviewed-by: Tony Lu +Signed-off-by: David S. Miller +Acked-by: Petr Tesarik +--- + net/smc/smc_core.c | 1 + + net/smc/smc_core.h | 2 ++ + net/smc/smc_wr.c | 5 +++++ + net/smc/smc_wr.h | 5 +++++ + 4 files changed, 13 insertions(+) + +--- a/net/smc/smc_core.c ++++ b/net/smc/smc_core.c +@@ -750,6 +750,7 @@ int smcr_link_init(struct smc_link_group + lnk->link_id = smcr_next_link_id(lgr); + lnk->lgr = lgr; + lnk->link_idx = link_idx; ++ lnk->wr_rx_id_compl = 0; + smc_ibdev_cnt_inc(lnk); + smcr_copy_dev_info_to_link(lnk); + atomic_set(&lnk->conn_cnt, 0); +--- a/net/smc/smc_core.h ++++ b/net/smc/smc_core.h +@@ -115,8 +115,10 @@ struct smc_link { + dma_addr_t wr_rx_dma_addr; /* DMA address of wr_rx_bufs */ + dma_addr_t wr_rx_v2_dma_addr; /* DMA address of v2 rx buf*/ + u64 wr_rx_id; /* seq # of last recv WR */ ++ u64 wr_rx_id_compl; /* seq # of last completed WR */ + u32 wr_rx_cnt; /* number of WR recv buffers */ + unsigned long wr_rx_tstamp; /* jiffies when last buf rx */ ++ wait_queue_head_t wr_rx_empty_wait; /* wait for RQ empty */ + + struct ib_reg_wr wr_reg; /* WR register memory region */ + wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */ +--- a/net/smc/smc_wr.c ++++ b/net/smc/smc_wr.c +@@ -458,6 +458,7 @@ static inline void smc_wr_rx_process_cqe + + for (i = 0; i < num; i++) { + link = wc[i].qp->qp_context; ++ link->wr_rx_id_compl = wc[i].wr_id; + if (wc[i].status == IB_WC_SUCCESS) { + link->wr_rx_tstamp = jiffies; + smc_wr_rx_demultiplex(&wc[i]); +@@ -469,6 +470,8 @@ static inline void smc_wr_rx_process_cqe + case IB_WC_RNR_RETRY_EXC_ERR: + case IB_WC_WR_FLUSH_ERR: + smcr_link_down_cond_sched(link); ++ if (link->wr_rx_id_compl == link->wr_rx_id) ++ wake_up(&link->wr_rx_empty_wait); + break; + default: + smc_wr_rx_post(link); /* refill WR RX */ +@@ -640,6 +643,7 @@ void smc_wr_free_link(struct smc_link *l + return; + ibdev = lnk->smcibdev->ibdev; + ++ smc_wr_drain_cq(lnk); + smc_wr_wakeup_reg_wait(lnk); + smc_wr_wakeup_tx_wait(lnk); + +@@ -893,6 +897,7 @@ int smc_wr_create_link(struct smc_link * + atomic_set(&lnk->wr_tx_refcnt, 0); + init_waitqueue_head(&lnk->wr_reg_wait); + atomic_set(&lnk->wr_reg_refcnt, 0); ++ init_waitqueue_head(&lnk->wr_rx_empty_wait); + return rc; + + dma_unmap: +--- a/net/smc/smc_wr.h ++++ b/net/smc/smc_wr.h +@@ -73,6 +73,11 @@ static inline void smc_wr_tx_link_put(st + wake_up_all(&link->wr_tx_wait); + } + ++static inline void smc_wr_drain_cq(struct smc_link *lnk) ++{ ++ wait_event(lnk->wr_rx_empty_wait, lnk->wr_rx_id_compl == lnk->wr_rx_id); ++} ++ + static inline void smc_wr_wakeup_tx_wait(struct smc_link *lnk) + { + wake_up_all(&lnk->wr_tx_wait); diff --git a/patches.suse/net-smc-Fix-possible-leaked-pernet-namespace-in-smc_init b/patches.suse/net-smc-Fix-possible-leaked-pernet-namespace-in-smc_init new file mode 100644 index 0000000..59c1822 --- /dev/null +++ b/patches.suse/net-smc-Fix-possible-leaked-pernet-namespace-in-smc_init @@ -0,0 +1,65 @@ +From: Chen Zhongjin +Date: Tue, 1 Nov 2022 17:37:22 +0800 +Subject: net/smc: Fix possible leaked pernet namespace in smc_init() +Git-commit: 62ff373da2534534c55debe6c724c7fe14adb97f +Patch-mainline: v6.1-rc4 +References: git-fixes + +In smc_init(), register_pernet_subsys(&smc_net_stat_ops) is called +without any error handling. +If it fails, registering of &smc_net_ops won't be reverted. +And if smc_nl_init() fails, &smc_net_stat_ops itself won't be reverted. + +This leaves wild ops in subsystem linkedlist and when another module +tries to call register_pernet_operations() it triggers page fault: + +BUG: unable to handle page fault for address: fffffbfff81b964c +RIP: 0010:register_pernet_operations+0x1b9/0x5f0 +Call Trace: + + register_pernet_subsys+0x29/0x40 + ebtables_init+0x58/0x1000 [ebtables] + ... + +Fixes: 194730a9beb5 ("net/smc: Make SMC statistics network namespace aware") +Signed-off-by: Chen Zhongjin +Reviewed-by: Tony Lu +Reviewed-by: Wenjia Zhang +Link: https://lore.kernel.org/r/20221101093722.127223-1-chenzhongjin@huawei.com +Signed-off-by: Jakub Kicinski +Acked-by: Petr Tesarik +--- + net/smc/af_smc.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c +index 3ccbf3c201cd..e12d4fa5aece 100644 +--- a/net/smc/af_smc.c ++++ b/net/smc/af_smc.c +@@ -3380,14 +3380,14 @@ static int __init smc_init(void) + + rc = register_pernet_subsys(&smc_net_stat_ops); + if (rc) +- return rc; ++ goto out_pernet_subsys; + + smc_ism_init(); + smc_clc_init(); + + rc = smc_nl_init(); + if (rc) +- goto out_pernet_subsys; ++ goto out_pernet_subsys_stat; + + rc = smc_pnet_init(); + if (rc) +@@ -3480,6 +3480,8 @@ static int __init smc_init(void) + smc_pnet_exit(); + out_nl: + smc_nl_exit(); ++out_pernet_subsys_stat: ++ unregister_pernet_subsys(&smc_net_stat_ops); + out_pernet_subsys: + unregister_pernet_subsys(&smc_net_ops); + + diff --git a/patches.suse/net-smc-Fix-slab-out-of-bounds-issue-in-fallback b/patches.suse/net-smc-Fix-slab-out-of-bounds-issue-in-fallback new file mode 100644 index 0000000..548c6d0 --- /dev/null +++ b/patches.suse/net-smc-Fix-slab-out-of-bounds-issue-in-fallback @@ -0,0 +1,205 @@ +From: Wen Gu +Date: Fri, 22 Apr 2022 15:56:19 +0800 +Subject: net/smc: Fix slab-out-of-bounds issue in fallback +Git-commit: 0558226cebee256aa3f8ec0cc5a800a10bf120a6 +Patch-mainline: v5.18-rc5 +References: git-fixes + +syzbot reported a slab-out-of-bounds/use-after-free issue, +which was caused by accessing an already freed smc sock in +fallback-specific callback functions of clcsock. + +This patch fixes the issue by restoring fallback-specific +callback functions to original ones and resetting clcsock +sk_user_data to NULL before freeing smc sock. + +Meanwhile, this patch introduces sk_callback_lock to make +the access and assignment to sk_user_data mutually exclusive. + +Reported-by: syzbot+b425899ed22c6943e00b@syzkaller.appspotmail.com +Fixes: 341adeec9ada ("net/smc: Forward wakeup to smc socket waitqueue after fallback") +Link: https://lore.kernel.org/r/00000000000013ca8105d7ae3ada@google.com/ +Signed-off-by: Wen Gu +Acked-by: Karsten Graul +Signed-off-by: Jakub Kicinski +Acked-by: Petr Tesarik +--- + net/smc/af_smc.c | 80 +++++++++++++++++++++++++++++++++++++--------------- + net/smc/smc_close.c | 2 + + 2 files changed, 59 insertions(+), 23 deletions(-) + +--- a/net/smc/af_smc.c ++++ b/net/smc/af_smc.c +@@ -131,11 +131,27 @@ struct proto smc_proto6 = { + }; + EXPORT_SYMBOL_GPL(smc_proto6); + ++static void smc_fback_restore_callbacks(struct smc_sock *smc) ++{ ++ struct sock *clcsk = smc->clcsock->sk; ++ ++ write_lock_bh(&clcsk->sk_callback_lock); ++ clcsk->sk_user_data = NULL; ++ ++ smc_clcsock_restore_cb(&clcsk->sk_state_change, &smc->clcsk_state_change); ++ smc_clcsock_restore_cb(&clcsk->sk_data_ready, &smc->clcsk_data_ready); ++ smc_clcsock_restore_cb(&clcsk->sk_write_space, &smc->clcsk_write_space); ++ smc_clcsock_restore_cb(&clcsk->sk_error_report, &smc->clcsk_error_report); ++ ++ write_unlock_bh(&clcsk->sk_callback_lock); ++} ++ + static void smc_restore_fallback_changes(struct smc_sock *smc) + { + if (smc->clcsock->file) { /* non-accepted sockets have no file yet */ + smc->clcsock->file->private_data = smc->sk.sk_socket; + smc->clcsock->file = NULL; ++ smc_fback_restore_callbacks(smc); + } + } + +@@ -633,48 +649,57 @@ out: + + static void smc_fback_state_change(struct sock *clcsk) + { +- struct smc_sock *smc = +- smc_clcsock_user_data(clcsk); ++ struct smc_sock *smc; + +- if (!smc) +- return; +- smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_state_change); ++ read_lock_bh(&clcsk->sk_callback_lock); ++ smc = smc_clcsock_user_data(clcsk); ++ if (smc) ++ smc_fback_forward_wakeup(smc, clcsk, ++ smc->clcsk_state_change); ++ read_unlock_bh(&clcsk->sk_callback_lock); + } + + static void smc_fback_data_ready(struct sock *clcsk) + { +- struct smc_sock *smc = +- smc_clcsock_user_data(clcsk); ++ struct smc_sock *smc; + +- if (!smc) +- return; +- smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_data_ready); ++ read_lock_bh(&clcsk->sk_callback_lock); ++ smc = smc_clcsock_user_data(clcsk); ++ if (smc) ++ smc_fback_forward_wakeup(smc, clcsk, ++ smc->clcsk_data_ready); ++ read_unlock_bh(&clcsk->sk_callback_lock); + } + + static void smc_fback_write_space(struct sock *clcsk) + { +- struct smc_sock *smc = +- smc_clcsock_user_data(clcsk); ++ struct smc_sock *smc; + +- if (!smc) +- return; +- smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_write_space); ++ read_lock_bh(&clcsk->sk_callback_lock); ++ smc = smc_clcsock_user_data(clcsk); ++ if (smc) ++ smc_fback_forward_wakeup(smc, clcsk, ++ smc->clcsk_write_space); ++ read_unlock_bh(&clcsk->sk_callback_lock); + } + + static void smc_fback_error_report(struct sock *clcsk) + { +- struct smc_sock *smc = +- smc_clcsock_user_data(clcsk); ++ struct smc_sock *smc; + +- if (!smc) +- return; +- smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_error_report); ++ read_lock_bh(&clcsk->sk_callback_lock); ++ smc = smc_clcsock_user_data(clcsk); ++ if (smc) ++ smc_fback_forward_wakeup(smc, clcsk, ++ smc->clcsk_error_report); ++ read_unlock_bh(&clcsk->sk_callback_lock); + } + + static void smc_fback_replace_callbacks(struct smc_sock *smc) + { + struct sock *clcsk = smc->clcsock->sk; + ++ write_lock_bh(&clcsk->sk_callback_lock); + clcsk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); + + smc_clcsock_replace_cb(&clcsk->sk_state_change, smc_fback_state_change, +@@ -685,6 +710,8 @@ static void smc_fback_replace_callbacks( + &smc->clcsk_write_space); + smc_clcsock_replace_cb(&clcsk->sk_error_report, smc_fback_error_report, + &smc->clcsk_error_report); ++ ++ write_unlock_bh(&clcsk->sk_callback_lock); + } + + static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) +@@ -2242,17 +2269,20 @@ out: + + static void smc_clcsock_data_ready(struct sock *listen_clcsock) + { +- struct smc_sock *lsmc = +- smc_clcsock_user_data(listen_clcsock); ++ struct smc_sock *lsmc; + ++ read_lock_bh(&listen_clcsock->sk_callback_lock); ++ lsmc = smc_clcsock_user_data(listen_clcsock); + if (!lsmc) +- return; ++ goto out; + lsmc->clcsk_data_ready(listen_clcsock); + if (lsmc->sk.sk_state == SMC_LISTEN) { + sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */ + if (!queue_work(smc_hs_wq, &lsmc->tcp_listen_work)) + sock_put(&lsmc->sk); + } ++out: ++ read_unlock_bh(&listen_clcsock->sk_callback_lock); + } + + static int smc_listen(struct socket *sock, int backlog) +@@ -2284,15 +2314,19 @@ static int smc_listen(struct socket *soc + /* save original sk_data_ready function and establish + * smc-specific sk_data_ready function + */ ++ write_lock_bh(&smc->clcsock->sk->sk_callback_lock); + smc->clcsock->sk->sk_user_data = + (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); + smc_clcsock_replace_cb(&smc->clcsock->sk->sk_data_ready, + smc_clcsock_data_ready, &smc->clcsk_data_ready); ++ write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); + rc = kernel_listen(smc->clcsock, backlog); + if (rc) { ++ write_lock_bh(&smc->clcsock->sk->sk_callback_lock); + smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready, + &smc->clcsk_data_ready); + smc->clcsock->sk->sk_user_data = NULL; ++ write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); + goto out; + } + sk->sk_max_ack_backlog = backlog; +--- a/net/smc/smc_close.c ++++ b/net/smc/smc_close.c +@@ -211,9 +211,11 @@ again: + sk->sk_state = SMC_CLOSED; + sk->sk_state_change(sk); /* wake up accept */ + if (smc->clcsock && smc->clcsock->sk) { ++ write_lock_bh(&smc->clcsock->sk->sk_callback_lock); + smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready, + &smc->clcsk_data_ready); + smc->clcsock->sk->sk_user_data = NULL; ++ write_unlock_bh(&smc->clcsock->sk->sk_callback_lock); + rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); + } + smc_close_cleanup_listen(sk); diff --git a/patches.suse/net-smc-Fix-sock-leak-when-release-after-smc_shutdown b/patches.suse/net-smc-Fix-sock-leak-when-release-after-smc_shutdown new file mode 100644 index 0000000..f603cf8 --- /dev/null +++ b/patches.suse/net-smc-Fix-sock-leak-when-release-after-smc_shutdown @@ -0,0 +1,38 @@ +From: Tony Lu +Date: Thu, 14 Apr 2022 15:51:03 +0800 +Subject: net/smc: Fix sock leak when release after smc_shutdown() +Git-commit: 1a74e99323746353bba11562a2f2d0aa8102f402 +Patch-mainline: v5.18-rc4 +References: git-fixes + +Since commit e5d5aadcf3cd ("net/smc: fix sk_refcnt underflow on linkdown +and fallback"), for a fallback connection, __smc_release() does not call +sock_put() if its state is already SMC_CLOSED. + +When calling smc_shutdown() after falling back, its state is set to +SMC_CLOSED but does not call sock_put(), so this patch calls it. + +Reported-and-tested-by: syzbot+6e29a053eb165bd50de5@syzkaller.appspotmail.com +Fixes: e5d5aadcf3cd ("net/smc: fix sk_refcnt underflow on linkdown and fallback") +Signed-off-by: Tony Lu +Acked-by: Karsten Graul +Signed-off-by: David S. Miller +Acked-by: Petr Tesarik +--- + net/smc/af_smc.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/smc/af_smc.c ++++ b/net/smc/af_smc.c +@@ -2530,8 +2530,10 @@ static int smc_shutdown(struct socket *s + if (smc->use_fallback) { + rc = kernel_sock_shutdown(smc->clcsock, how); + sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; +- if (sk->sk_shutdown == SHUTDOWN_MASK) ++ if (sk->sk_shutdown == SHUTDOWN_MASK) { + sk->sk_state = SMC_CLOSED; ++ sock_put(sk); ++ } + goto out; + } + switch (how) { diff --git a/patches.suse/net-smc-Forward-wakeup-to-smc-socket-waitqueue-after-fallback b/patches.suse/net-smc-Forward-wakeup-to-smc-socket-waitqueue-after-fallback new file mode 100644 index 0000000..11fb6e4 --- /dev/null +++ b/patches.suse/net-smc-Forward-wakeup-to-smc-socket-waitqueue-after-fallback @@ -0,0 +1,289 @@ +From: Wen Gu +Date: Wed, 26 Jan 2022 23:33:04 +0800 +Subject: net/smc: Forward wakeup to smc socket waitqueue after fallback +Git-commit: 341adeec9adad0874f29a0a1af35638207352a39 +Patch-mainline: v5.17-rc3 +References: git-fixes + +When we replace TCP with SMC and a fallback occurs, there may be +some socket waitqueue entries remaining in smc socket->wq, such +as eppoll_entries inserted by userspace applications. + +After the fallback, data flows over TCP/IP and only clcsocket->wq +will be woken up. Applications can't be notified by the entries +which were inserted in smc socket->wq before fallback. So we need +a mechanism to wake up smc socket->wq at the same time if some +entries remaining in it. + +The current workaround is to transfer the entries from smc socket->wq +to clcsock->wq during the fallback. But this may cause a crash +like this: + + general protection fault, probably for non-canonical address 0xdead000000000100: 0000 [#1] PREEMPT SMP PTI + CPU: 3 PID: 0 Comm: swapper/3 Kdump: loaded Tainted: G E 5.16.0+ #107 + RIP: 0010:__wake_up_common+0x65/0x170 + Call Trace: + + __wake_up_common_lock+0x7a/0xc0 + sock_def_readable+0x3c/0x70 + tcp_data_queue+0x4a7/0xc40 + tcp_rcv_established+0x32f/0x660 + ? sk_filter_trim_cap+0xcb/0x2e0 + tcp_v4_do_rcv+0x10b/0x260 + tcp_v4_rcv+0xd2a/0xde0 + ip_protocol_deliver_rcu+0x3b/0x1d0 + ip_local_deliver_finish+0x54/0x60 + ip_local_deliver+0x6a/0x110 + ? tcp_v4_early_demux+0xa2/0x140 + ? tcp_v4_early_demux+0x10d/0x140 + ip_sublist_rcv_finish+0x49/0x60 + ip_sublist_rcv+0x19d/0x230 + ip_list_rcv+0x13e/0x170 + __netif_receive_skb_list_core+0x1c2/0x240 + netif_receive_skb_list_internal+0x1e6/0x320 + napi_complete_done+0x11d/0x190 + mlx5e_napi_poll+0x163/0x6b0 [mlx5_core] + __napi_poll+0x3c/0x1b0 + net_rx_action+0x27c/0x300 + __do_softirq+0x114/0x2d2 + irq_exit_rcu+0xb4/0xe0 + common_interrupt+0xba/0xe0 + + + +The crash is caused by privately transferring waitqueue entries from +smc socket->wq to clcsock->wq. The owners of these entries, such as +epoll, have no idea that the entries have been transferred to a +different socket wait queue and still use original waitqueue spinlock +(smc socket->wq.wait.lock) to make the entries operation exclusive, +but it doesn't work. The operations to the entries, such as removing +from the waitqueue (now is clcsock->wq after fallback), may cause a +crash when clcsock waitqueue is being iterated over at the moment. + +This patch tries to fix this by no longer transferring wait queue +entries privately, but introducing own implementations of clcsock's +callback functions in fallback situation. The callback functions will +forward the wakeup to smc socket->wq if clcsock->wq is actually woken +up and smc socket->wq has remaining entries. + +Fixes: 2153bd1e3d3d ("net/smc: Transfer remaining wait queue entries during fallback") +Suggested-by: Karsten Graul +Signed-off-by: Wen Gu +Acked-by: Karsten Graul +Signed-off-by: David S. Miller +Acked-by: Petr Tesarik +--- + net/smc/af_smc.c | 133 ++++++++++++++++++++++++++++++++++++++++++++++++------- + net/smc/smc.h | 20 +++++++- + 2 files changed, 137 insertions(+), 16 deletions(-) + +--- a/net/smc/af_smc.c ++++ b/net/smc/af_smc.c +@@ -566,17 +566,115 @@ static void smc_stat_fallback(struct smc + mutex_unlock(&net->smc.mutex_fback_rsn); + } + ++/* must be called under rcu read lock */ ++static void smc_fback_wakeup_waitqueue(struct smc_sock *smc, void *key) ++{ ++ struct socket_wq *wq; ++ __poll_t flags; ++ ++ wq = rcu_dereference(smc->sk.sk_wq); ++ if (!skwq_has_sleeper(wq)) ++ return; ++ ++ /* wake up smc sk->sk_wq */ ++ if (!key) { ++ /* sk_state_change */ ++ wake_up_interruptible_all(&wq->wait); ++ } else { ++ flags = key_to_poll(key); ++ if (flags & (EPOLLIN | EPOLLOUT)) ++ /* sk_data_ready or sk_write_space */ ++ wake_up_interruptible_sync_poll(&wq->wait, flags); ++ else if (flags & EPOLLERR) ++ /* sk_error_report */ ++ wake_up_interruptible_poll(&wq->wait, flags); ++ } ++} ++ ++static int smc_fback_mark_woken(wait_queue_entry_t *wait, ++ unsigned int mode, int sync, void *key) ++{ ++ struct smc_mark_woken *mark = ++ container_of(wait, struct smc_mark_woken, wait_entry); ++ ++ mark->woken = true; ++ mark->key = key; ++ return 0; ++} ++ ++static void smc_fback_forward_wakeup(struct smc_sock *smc, struct sock *clcsk, ++ void (*clcsock_callback)(struct sock *sk)) ++{ ++ struct smc_mark_woken mark = { .woken = false }; ++ struct socket_wq *wq; ++ ++ init_waitqueue_func_entry(&mark.wait_entry, ++ smc_fback_mark_woken); ++ rcu_read_lock(); ++ wq = rcu_dereference(clcsk->sk_wq); ++ if (!wq) ++ goto out; ++ add_wait_queue(sk_sleep(clcsk), &mark.wait_entry); ++ clcsock_callback(clcsk); ++ remove_wait_queue(sk_sleep(clcsk), &mark.wait_entry); ++ ++ if (mark.woken) ++ smc_fback_wakeup_waitqueue(smc, mark.key); ++out: ++ rcu_read_unlock(); ++} ++ ++static void smc_fback_state_change(struct sock *clcsk) ++{ ++ struct smc_sock *smc = ++ smc_clcsock_user_data(clcsk); ++ ++ if (!smc) ++ return; ++ smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_state_change); ++} ++ ++static void smc_fback_data_ready(struct sock *clcsk) ++{ ++ struct smc_sock *smc = ++ smc_clcsock_user_data(clcsk); ++ ++ if (!smc) ++ return; ++ smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_data_ready); ++} ++ ++static void smc_fback_write_space(struct sock *clcsk) ++{ ++ struct smc_sock *smc = ++ smc_clcsock_user_data(clcsk); ++ ++ if (!smc) ++ return; ++ smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_write_space); ++} ++ ++static void smc_fback_error_report(struct sock *clcsk) ++{ ++ struct smc_sock *smc = ++ smc_clcsock_user_data(clcsk); ++ ++ if (!smc) ++ return; ++ smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_error_report); ++} ++ + static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) + { +- wait_queue_head_t *smc_wait = sk_sleep(&smc->sk); +- wait_queue_head_t *clc_wait; +- unsigned long flags; ++ struct sock *clcsk; + + mutex_lock(&smc->clcsock_release_lock); + if (!smc->clcsock) { + mutex_unlock(&smc->clcsock_release_lock); + return -EBADF; + } ++ clcsk = smc->clcsock->sk; ++ + smc->use_fallback = true; + smc->fallback_rsn = reason_code; + smc_stat_fallback(smc); +@@ -587,16 +685,22 @@ static int smc_switch_to_fallback(struct + smc->clcsock->wq.fasync_list = + smc->sk.sk_socket->wq.fasync_list; + +- /* There may be some entries remaining in +- * smc socket->wq, which should be removed +- * to clcsocket->wq during the fallback. ++ /* There might be some wait entries remaining ++ * in smc sk->sk_wq and they should be woken up ++ * as clcsock's wait queue is woken up. + */ +- clc_wait = sk_sleep(smc->clcsock->sk); +- spin_lock_irqsave(&smc_wait->lock, flags); +- spin_lock_nested(&clc_wait->lock, SINGLE_DEPTH_NESTING); +- list_splice_init(&smc_wait->head, &clc_wait->head); +- spin_unlock(&clc_wait->lock); +- spin_unlock_irqrestore(&smc_wait->lock, flags); ++ smc->clcsk_state_change = clcsk->sk_state_change; ++ smc->clcsk_data_ready = clcsk->sk_data_ready; ++ smc->clcsk_write_space = clcsk->sk_write_space; ++ smc->clcsk_error_report = clcsk->sk_error_report; ++ ++ clcsk->sk_state_change = smc_fback_state_change; ++ clcsk->sk_data_ready = smc_fback_data_ready; ++ clcsk->sk_write_space = smc_fback_write_space; ++ clcsk->sk_error_report = smc_fback_error_report; ++ ++ smc->clcsock->sk->sk_user_data = ++ (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); + } + mutex_unlock(&smc->clcsock_release_lock); + return 0; +@@ -2111,10 +2215,9 @@ out: + + static void smc_clcsock_data_ready(struct sock *listen_clcsock) + { +- struct smc_sock *lsmc; ++ struct smc_sock *lsmc = ++ smc_clcsock_user_data(listen_clcsock); + +- lsmc = (struct smc_sock *) +- ((uintptr_t)listen_clcsock->sk_user_data & ~SK_USER_DATA_NOCOPY); + if (!lsmc) + return; + lsmc->clcsk_data_ready(listen_clcsock); +--- a/net/smc/smc.h ++++ b/net/smc/smc.h +@@ -139,6 +139,12 @@ enum smc_urg_state { + SMC_URG_READ = 3, /* data was already read */ + }; + ++struct smc_mark_woken { ++ bool woken; ++ void *key; ++ wait_queue_entry_t wait_entry; ++}; ++ + struct smc_connection { + struct rb_node alert_node; + struct smc_link_group *lgr; /* link group of connection */ +@@ -227,8 +233,14 @@ struct smc_connection { + struct smc_sock { /* smc sock container */ + struct sock sk; + struct socket *clcsock; /* internal tcp socket */ ++ void (*clcsk_state_change)(struct sock *sk); ++ /* original stat_change fct. */ + void (*clcsk_data_ready)(struct sock *sk); +- /* original data_ready fct. **/ ++ /* original data_ready fct. */ ++ void (*clcsk_write_space)(struct sock *sk); ++ /* original write_space fct. */ ++ void (*clcsk_error_report)(struct sock *sk); ++ /* original error_report fct. */ + struct smc_connection conn; /* smc connection */ + struct smc_sock *listen_smc; /* listen parent */ + struct work_struct connect_work; /* handle non-blocking connect*/ +@@ -263,6 +275,12 @@ static inline struct smc_sock *smc_sk(co + return (struct smc_sock *)sk; + } + ++static inline struct smc_sock *smc_clcsock_user_data(struct sock *clcsk) ++{ ++ return (struct smc_sock *) ++ ((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY); ++} ++ + extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */ + extern struct workqueue_struct *smc_close_wq; /* wq for close work */ + diff --git a/patches.suse/net-smc-Only-save-the-original-clcsock-callback-functions b/patches.suse/net-smc-Only-save-the-original-clcsock-callback-functions new file mode 100644 index 0000000..64774df --- /dev/null +++ b/patches.suse/net-smc-Only-save-the-original-clcsock-callback-functions @@ -0,0 +1,187 @@ +From: Wen Gu +Date: Fri, 22 Apr 2022 15:56:18 +0800 +Subject: net/smc: Only save the original clcsock callback functions +Git-commit: 97b9af7a70936e331170c79040cc9bf20071b566 +Patch-mainline: v5.18-rc5 +References: git-fixes + +Both listen and fallback process will save the current clcsock +callback functions and establish new ones. But if both of them +happen, the saved callback functions will be overwritten. + +So this patch introduces some helpers to ensure that only save +the original callback functions of clcsock. + +Fixes: 341adeec9ada ("net/smc: Forward wakeup to smc socket waitqueue after fallback") +Signed-off-by: Wen Gu +Acked-by: Karsten Graul +Signed-off-by: Jakub Kicinski +Acked-by: Petr Tesarik +--- + net/smc/af_smc.c | 55 ++++++++++++++++++++++++++++++++++------------------ + net/smc/smc.h | 29 +++++++++++++++++++++++++++ + net/smc/smc_close.c | 3 +- + 3 files changed, 67 insertions(+), 20 deletions(-) + +--- a/net/smc/af_smc.c ++++ b/net/smc/af_smc.c +@@ -261,6 +261,7 @@ static struct sock *smc_sock_alloc(struc + sk->sk_prot->hash(sk); + sk_refcnt_debug_inc(sk); + mutex_init(&smc->clcsock_release_lock); ++ smc_init_saved_callbacks(smc); + + return sk; + } +@@ -670,9 +671,24 @@ static void smc_fback_error_report(struc + smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_error_report); + } + ++static void smc_fback_replace_callbacks(struct smc_sock *smc) ++{ ++ struct sock *clcsk = smc->clcsock->sk; ++ ++ clcsk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); ++ ++ smc_clcsock_replace_cb(&clcsk->sk_state_change, smc_fback_state_change, ++ &smc->clcsk_state_change); ++ smc_clcsock_replace_cb(&clcsk->sk_data_ready, smc_fback_data_ready, ++ &smc->clcsk_data_ready); ++ smc_clcsock_replace_cb(&clcsk->sk_write_space, smc_fback_write_space, ++ &smc->clcsk_write_space); ++ smc_clcsock_replace_cb(&clcsk->sk_error_report, smc_fback_error_report, ++ &smc->clcsk_error_report); ++} ++ + static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) + { +- struct sock *clcsk; + int rc = 0; + + mutex_lock(&smc->clcsock_release_lock); +@@ -680,10 +696,7 @@ static int smc_switch_to_fallback(struct + rc = -EBADF; + goto out; + } +- clcsk = smc->clcsock->sk; + +- if (smc->use_fallback) +- goto out; + smc->use_fallback = true; + smc->fallback_rsn = reason_code; + smc_stat_fallback(smc); +@@ -698,18 +711,7 @@ static int smc_switch_to_fallback(struct + * in smc sk->sk_wq and they should be woken up + * as clcsock's wait queue is woken up. + */ +- smc->clcsk_state_change = clcsk->sk_state_change; +- smc->clcsk_data_ready = clcsk->sk_data_ready; +- smc->clcsk_write_space = clcsk->sk_write_space; +- smc->clcsk_error_report = clcsk->sk_error_report; +- +- clcsk->sk_state_change = smc_fback_state_change; +- clcsk->sk_data_ready = smc_fback_data_ready; +- clcsk->sk_write_space = smc_fback_write_space; +- clcsk->sk_error_report = smc_fback_error_report; +- +- smc->clcsock->sk->sk_user_data = +- (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); ++ smc_fback_replace_callbacks(smc); + } + out: + mutex_unlock(&smc->clcsock_release_lock); +@@ -1474,6 +1476,19 @@ static int smc_clcsock_accept(struct smc + * function; switch it back to the original sk_data_ready function + */ + new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready; ++ ++ /* if new clcsock has also inherited the fallback-specific callback ++ * functions, switch them back to the original ones. ++ */ ++ if (lsmc->use_fallback) { ++ if (lsmc->clcsk_state_change) ++ new_clcsock->sk->sk_state_change = lsmc->clcsk_state_change; ++ if (lsmc->clcsk_write_space) ++ new_clcsock->sk->sk_write_space = lsmc->clcsk_write_space; ++ if (lsmc->clcsk_error_report) ++ new_clcsock->sk->sk_error_report = lsmc->clcsk_error_report; ++ } ++ + (*new_smc)->clcsock = new_clcsock; + out: + return rc; +@@ -2269,13 +2284,15 @@ static int smc_listen(struct socket *soc + /* save original sk_data_ready function and establish + * smc-specific sk_data_ready function + */ +- smc->clcsk_data_ready = smc->clcsock->sk->sk_data_ready; +- smc->clcsock->sk->sk_data_ready = smc_clcsock_data_ready; + smc->clcsock->sk->sk_user_data = + (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); ++ smc_clcsock_replace_cb(&smc->clcsock->sk->sk_data_ready, ++ smc_clcsock_data_ready, &smc->clcsk_data_ready); + rc = kernel_listen(smc->clcsock, backlog); + if (rc) { +- smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready; ++ smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready, ++ &smc->clcsk_data_ready); ++ smc->clcsock->sk->sk_user_data = NULL; + goto out; + } + sk->sk_max_ack_backlog = backlog; +--- a/net/smc/smc.h ++++ b/net/smc/smc.h +@@ -275,12 +275,41 @@ static inline struct smc_sock *smc_sk(co + return (struct smc_sock *)sk; + } + ++static inline void smc_init_saved_callbacks(struct smc_sock *smc) ++{ ++ smc->clcsk_state_change = NULL; ++ smc->clcsk_data_ready = NULL; ++ smc->clcsk_write_space = NULL; ++ smc->clcsk_error_report = NULL; ++} ++ + static inline struct smc_sock *smc_clcsock_user_data(struct sock *clcsk) + { + return (struct smc_sock *) + ((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY); + } + ++/* save target_cb in saved_cb, and replace target_cb with new_cb */ ++static inline void smc_clcsock_replace_cb(void (**target_cb)(struct sock *), ++ void (*new_cb)(struct sock *), ++ void (**saved_cb)(struct sock *)) ++{ ++ /* only save once */ ++ if (!*saved_cb) ++ *saved_cb = *target_cb; ++ *target_cb = new_cb; ++} ++ ++/* restore target_cb to saved_cb, and reset saved_cb to NULL */ ++static inline void smc_clcsock_restore_cb(void (**target_cb)(struct sock *), ++ void (**saved_cb)(struct sock *)) ++{ ++ if (!*saved_cb) ++ return; ++ *target_cb = *saved_cb; ++ *saved_cb = NULL; ++} ++ + extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */ + extern struct workqueue_struct *smc_close_wq; /* wq for close work */ + +--- a/net/smc/smc_close.c ++++ b/net/smc/smc_close.c +@@ -211,7 +211,8 @@ again: + sk->sk_state = SMC_CLOSED; + sk->sk_state_change(sk); /* wake up accept */ + if (smc->clcsock && smc->clcsock->sk) { +- smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready; ++ smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready, ++ &smc->clcsk_data_ready); + smc->clcsock->sk->sk_user_data = NULL; + rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); + } diff --git a/patches.suse/net-smc-Send-directly-when-TCP_CORK-is-cleared b/patches.suse/net-smc-Send-directly-when-TCP_CORK-is-cleared new file mode 100644 index 0000000..ede4913 --- /dev/null +++ b/patches.suse/net-smc-Send-directly-when-TCP_CORK-is-cleared @@ -0,0 +1,100 @@ +From: Tony Lu +Date: Mon, 31 Jan 2022 02:02:55 +0800 +Subject: net/smc: Send directly when TCP_CORK is cleared +Git-commit: ea785a1a573b390a150010b3c5b81e1ccd8c98a8 +Patch-mainline: v5.18-rc1 +References: git-fixes + +According to the man page of TCP_CORK [1], if set, don't send out +partial frames. All queued partial frames are sent when option is +cleared again. + +When applications call setsockopt to disable TCP_CORK, this call is +protected by lock_sock(), and tries to mod_delayed_work() to 0, in order +to send pending data right now. However, the delayed work smc_tx_work is +also protected by lock_sock(). There introduces lock contention for +sending data. + +To fix it, send pending data directly which acts like TCP, without +lock_sock() protected in the context of setsockopt (already lock_sock()ed), +and cancel unnecessary dealyed work, which is protected by lock. + +[1] https://linux.die.net/man/7/tcp + +Signed-off-by: Tony Lu +Signed-off-by: David S. Miller +Acked-by: Petr Tesarik +--- + net/smc/af_smc.c | 4 ++-- + net/smc/smc_tx.c | 25 +++++++++++++++---------- + net/smc/smc_tx.h | 1 + + 3 files changed, 18 insertions(+), 12 deletions(-) + +--- a/net/smc/af_smc.c ++++ b/net/smc/af_smc.c +@@ -2632,8 +2632,8 @@ static int smc_setsockopt(struct socket + sk->sk_state != SMC_CLOSED) { + if (!val) { + SMC_STAT_INC(smc, cork_cnt); +- mod_delayed_work(smc->conn.lgr->tx_wq, +- &smc->conn.tx_work, 0); ++ smc_tx_pending(&smc->conn); ++ cancel_delayed_work(&smc->conn.tx_work); + } + } + break; +--- a/net/smc/smc_tx.c ++++ b/net/smc/smc_tx.c +@@ -597,27 +597,32 @@ int smc_tx_sndbuf_nonempty(struct smc_co + return rc; + } + +-/* Wakeup sndbuf consumers from process context +- * since there is more data to transmit +- */ +-void smc_tx_work(struct work_struct *work) ++void smc_tx_pending(struct smc_connection *conn) + { +- struct smc_connection *conn = container_of(to_delayed_work(work), +- struct smc_connection, +- tx_work); + struct smc_sock *smc = container_of(conn, struct smc_sock, conn); + int rc; + +- lock_sock(&smc->sk); + if (smc->sk.sk_err) +- goto out; ++ return; + + rc = smc_tx_sndbuf_nonempty(conn); + if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked && + !atomic_read(&conn->bytes_to_rcv)) + conn->local_rx_ctrl.prod_flags.write_blocked = 0; ++} ++ ++/* Wakeup sndbuf consumers from process context ++ * since there is more data to transmit ++ */ ++void smc_tx_work(struct work_struct *work) ++{ ++ struct smc_connection *conn = container_of(to_delayed_work(work), ++ struct smc_connection, ++ tx_work); ++ struct smc_sock *smc = container_of(conn, struct smc_sock, conn); + +-out: ++ lock_sock(&smc->sk); ++ smc_tx_pending(conn); + release_sock(&smc->sk); + } + +--- a/net/smc/smc_tx.h ++++ b/net/smc/smc_tx.h +@@ -27,6 +27,7 @@ static inline int smc_tx_prepared_sends( + return smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep); + } + ++void smc_tx_pending(struct smc_connection *conn); + void smc_tx_work(struct work_struct *work); + void smc_tx_init(struct smc_sock *smc); + int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len); diff --git a/patches.suse/net-smc-send-directly-on-setting-TCP_NODELAY b/patches.suse/net-smc-send-directly-on-setting-TCP_NODELAY new file mode 100644 index 0000000..9cb2bf2 --- /dev/null +++ b/patches.suse/net-smc-send-directly-on-setting-TCP_NODELAY @@ -0,0 +1,40 @@ +From: Dust Li +Date: Tue, 1 Mar 2022 17:43:59 +0800 +Subject: net/smc: send directly on setting TCP_NODELAY +Git-commit: b70a5cc045197aad9c159042621baf3c015f6cc7 +Patch-mainline: v5.18-rc1 +References: git-fixes + +In commit ea785a1a573b("net/smc: Send directly when +TCP_CORK is cleared"), we don't use delayed work +to implement cork. + +This patch use the same algorithm, removes the +delayed work when setting TCP_NODELAY and send +directly in setsockopt(). This also makes the +TCP_NODELAY the same as TCP. + +Cc: Tony Lu +Signed-off-by: Dust Li +Signed-off-by: David S. Miller +Acked-by: Petr Tesarik +--- + net/smc/af_smc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c +index 19b3066cf7af..e661b3747945 100644 +--- a/net/smc/af_smc.c ++++ b/net/smc/af_smc.c +@@ -2796,8 +2796,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, + sk->sk_state != SMC_CLOSED) { + if (val) { + SMC_STAT_INC(smc, ndly_cnt); +- mod_delayed_work(smc->conn.lgr->tx_wq, +- &smc->conn.tx_work, 0); ++ smc_tx_pending(&smc->conn); ++ cancel_delayed_work(&smc->conn.tx_work); + } + } + break; + diff --git a/patches.suse/net-smsc95xx-add-support-for-Microchip-EVB-LAN8670-U.patch b/patches.suse/net-smsc95xx-add-support-for-Microchip-EVB-LAN8670-U.patch new file mode 100644 index 0000000..f033d0c --- /dev/null +++ b/patches.suse/net-smsc95xx-add-support-for-Microchip-EVB-LAN8670-U.patch @@ -0,0 +1,38 @@ +From 4066bf4ce3ae3e322fa0c3c6418e45d99ff086b8 Mon Sep 17 00:00:00 2001 +From: Parthiban Veerasooran +Date: Mon, 13 Jun 2022 14:42:07 +0530 +Subject: [PATCH] net: smsc95xx: add support for Microchip EVB-LAN8670-USB +Git-commit: 4066bf4ce3ae3e322fa0c3c6418e45d99ff086b8 +References: git-fixes +Patch-mainline: v6.0-rc1 + +This patch adds support for Microchip's EVB-LAN8670-USB 10BASE-T1S +ethernet device to the existing smsc95xx driver by adding the new +USB VID/PID pairs. + +Signed-off-by: Parthiban Veerasooran +Signed-off-by: David S. Miller +Signed-off-by: Oliver Neukum +--- + drivers/net/usb/smsc95xx.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c +index bd03e16f98a1..35110814ba22 100644 +--- a/drivers/net/usb/smsc95xx.c ++++ b/drivers/net/usb/smsc95xx.c +@@ -2088,6 +2088,11 @@ static const struct usb_device_id products[] = { + USB_DEVICE(0x0424, 0x9E08), + .driver_info = (unsigned long) &smsc95xx_info, + }, ++ { ++ /* Microchip's EVB-LAN8670-USB 10BASE-T1S Ethernet Device */ ++ USB_DEVICE(0x184F, 0x0051), ++ .driver_info = (unsigned long)&smsc95xx_info, ++ }, + { }, /* END */ + }; + MODULE_DEVICE_TABLE(usb, products); +-- +2.35.3 + diff --git a/patches.suse/nfs-blocklayout-use-bdev_nr_bytes-instead-of-open-co.patch b/patches.suse/nfs-blocklayout-use-bdev_nr_bytes-instead-of-open-co.patch new file mode 100644 index 0000000..ac5a8e4 --- /dev/null +++ b/patches.suse/nfs-blocklayout-use-bdev_nr_bytes-instead-of-open-co.patch @@ -0,0 +1,43 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:18 +0200 +Subject: [PATCH] nfs/blocklayout: use bdev_nr_bytes instead of open coding it +Git-commit: 6e50e781fe8873610f2e4011848f8b8d7406ee96 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helper to read the block device size. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Link: https://lore.kernel.org/r/20211018101130.1838532-19-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/nfs/blocklayout/dev.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c +index acb1d22907da..5e56da748b2a 100644 +--- a/fs/nfs/blocklayout/dev.c ++++ b/fs/nfs/blocklayout/dev.c +@@ -252,7 +252,7 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d, + d->bdev = bdev; + + +- d->len = i_size_read(d->bdev->bd_inode); ++ d->len = bdev_nr_bytes(d->bdev); + d->map = bl_map_simple; + + printk(KERN_INFO "pNFS: using block device %s\n", +@@ -367,7 +367,7 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, + return PTR_ERR(bdev); + d->bdev = bdev; + +- d->len = i_size_read(d->bdev->bd_inode); ++ d->len = bdev_nr_bytes(d->bdev); + d->map = bl_map_simple; + d->pr_key = v->scsi.pr_key; + +-- +2.35.3 + diff --git a/patches.suse/nfsd-blocklayout-use-get_unique_id-instead-of-sendin.patch b/patches.suse/nfsd-blocklayout-use-get_unique_id-instead-of-sendin.patch new file mode 100644 index 0000000..91de61e --- /dev/null +++ b/patches.suse/nfsd-blocklayout-use-get_unique_id-instead-of-sendin.patch @@ -0,0 +1,270 @@ +From: Christoph Hellwig +Date: Thu, 21 Oct 2021 08:06:03 +0200 +Subject: [PATCH] nfsd/blocklayout: use ->get_unique_id instead of sending SCSI + commands +Git-commit: 8c6aabd1c72bc241c55f5b71a86cea5ef28bceca +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Call the ->get_unique_id method to query the SCSI identifiers. This can +use the cached VPD page in the sd driver instead of sending a command +on every LAYOUTGET. It will also allow to support NVMe based volumes +if the draft for that ever takes off. + +Signed-off-by: Christoph Hellwig +Acked-by: J. Bruce Fields +Reviewed-by: Hannes Reinecke +Link: https://lore.kernel.org/r/20211021060607.264371-4-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/nfsd/Kconfig | 1 - + fs/nfsd/blocklayout.c | 158 +++++++++++------------------------------- + fs/nfsd/nfs4layouts.c | 5 +- + 3 files changed, 44 insertions(+), 120 deletions(-) + +diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig +index 6e9ea4ee0f73..3d1d17256a91 100644 +--- a/fs/nfsd/Kconfig ++++ b/fs/nfsd/Kconfig +@@ -109,7 +109,6 @@ config NFSD_SCSILAYOUT + depends on NFSD_V4 && BLOCK + select NFSD_PNFS + select EXPORTFS_BLOCK_OPS +- select SCSI_COMMON + help + This option enables support for the exporting pNFS SCSI layouts + in the kernel's NFS server. The pNFS SCSI layout enables NFS +diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c +index c99dee99a3c1..e5c0982a381d 100644 +--- a/fs/nfsd/blocklayout.c ++++ b/fs/nfsd/blocklayout.c +@@ -9,9 +9,6 @@ + #include + + #include +-#include +-#include +-#include + + #include "blocklayoutxdr.h" + #include "pnfs.h" +@@ -211,109 +208,6 @@ const struct nfsd4_layout_ops bl_layout_ops = { + #endif /* CONFIG_NFSD_BLOCKLAYOUT */ + + #ifdef CONFIG_NFSD_SCSILAYOUT +-static int nfsd4_scsi_identify_device(struct block_device *bdev, +- struct pnfs_block_volume *b) +-{ +- struct request_queue *q = bdev->bd_disk->queue; +- struct request *rq; +- struct scsi_request *req; +- /* +- * The allocation length (passed in bytes 3 and 4 of the INQUIRY +- * command descriptor block) specifies the number of bytes that have +- * been allocated for the data-in buffer. +- * 252 is the highest one-byte value that is a multiple of 4. +- * 65532 is the highest two-byte value that is a multiple of 4. +- */ +- size_t bufflen = 252, maxlen = 65532, len, id_len; +- u8 *buf, *d, type, assoc; +- int retries = 1, error; +- +- if (WARN_ON_ONCE(!blk_queue_scsi_passthrough(q))) +- return -EINVAL; +- +-again: +- buf = kzalloc(bufflen, GFP_KERNEL); +- if (!buf) +- return -ENOMEM; +- +- rq = blk_get_request(q, REQ_OP_DRV_IN, 0); +- if (IS_ERR(rq)) { +- error = -ENOMEM; +- goto out_free_buf; +- } +- req = scsi_req(rq); +- +- error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL); +- if (error) +- goto out_put_request; +- +- req->cmd[0] = INQUIRY; +- req->cmd[1] = 1; +- req->cmd[2] = 0x83; +- req->cmd[3] = bufflen >> 8; +- req->cmd[4] = bufflen & 0xff; +- req->cmd_len = COMMAND_SIZE(INQUIRY); +- +- blk_execute_rq(NULL, rq, 1); +- if (req->result) { +- pr_err("pNFS: INQUIRY 0x83 failed with: %x\n", +- req->result); +- error = -EIO; +- goto out_put_request; +- } +- +- len = (buf[2] << 8) + buf[3] + 4; +- if (len > bufflen) { +- if (len <= maxlen && retries--) { +- blk_put_request(rq); +- kfree(buf); +- bufflen = len; +- goto again; +- } +- pr_err("pNFS: INQUIRY 0x83 response invalid (len = %zd)\n", +- len); +- goto out_put_request; +- } +- +- d = buf + 4; +- for (d = buf + 4; d < buf + len; d += id_len + 4) { +- id_len = d[3]; +- type = d[1] & 0xf; +- assoc = (d[1] >> 4) & 0x3; +- +- /* +- * We only care about a EUI-64 and NAA designator types +- * with LU association. +- */ +- if (assoc != 0x00) +- continue; +- if (type != 0x02 && type != 0x03) +- continue; +- if (id_len != 8 && id_len != 12 && id_len != 16) +- continue; +- +- b->scsi.code_set = PS_CODE_SET_BINARY; +- b->scsi.designator_type = type == 0x02 ? +- PS_DESIGNATOR_EUI64 : PS_DESIGNATOR_NAA; +- b->scsi.designator_len = id_len; +- memcpy(b->scsi.designator, d + 4, id_len); +- +- /* +- * If we found a 8 or 12 byte descriptor continue on to +- * see if a 16 byte one is available. If we find a +- * 16 byte descriptor we're done. +- */ +- if (id_len == 16) +- break; +- } +- +-out_put_request: +- blk_put_request(rq); +-out_free_buf: +- kfree(buf); +- return error; +-} +- + #define NFSD_MDS_PR_KEY 0x0100000000000000ULL + + /* +@@ -325,6 +219,31 @@ static u64 nfsd4_scsi_pr_key(struct nfs4_client *clp) + return ((u64)clp->cl_clientid.cl_boot << 32) | clp->cl_clientid.cl_id; + } + ++static const u8 designator_types[] = { ++ PS_DESIGNATOR_EUI64, ++ PS_DESIGNATOR_NAA, ++}; ++ ++static int ++nfsd4_block_get_unique_id(struct gendisk *disk, struct pnfs_block_volume *b) ++{ ++ int ret, i; ++ ++ for (i = 0; i < ARRAY_SIZE(designator_types); i++) { ++ u8 type = designator_types[i]; ++ ++ ret = disk->fops->get_unique_id(disk, b->scsi.designator, type); ++ if (ret > 0) { ++ b->scsi.code_set = PS_CODE_SET_BINARY; ++ b->scsi.designator_type = type; ++ b->scsi.designator_len = ret; ++ return 0; ++ } ++ } ++ ++ return -EINVAL; ++} ++ + static int + nfsd4_block_get_device_info_scsi(struct super_block *sb, + struct nfs4_client *clp, +@@ -333,7 +252,7 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb, + struct pnfs_block_deviceaddr *dev; + struct pnfs_block_volume *b; + const struct pr_ops *ops; +- int error; ++ int ret; + + dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) + + sizeof(struct pnfs_block_volume), GFP_KERNEL); +@@ -347,33 +266,38 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb, + b->type = PNFS_BLOCK_VOLUME_SCSI; + b->scsi.pr_key = nfsd4_scsi_pr_key(clp); + +- error = nfsd4_scsi_identify_device(sb->s_bdev, b); +- if (error) +- return error; ++ ret = nfsd4_block_get_unique_id(sb->s_bdev->bd_disk, b); ++ if (ret < 0) ++ goto out_free_dev; + ++ ret = -EINVAL; + ops = sb->s_bdev->bd_disk->fops->pr_ops; + if (!ops) { + pr_err("pNFS: device %s does not support PRs.\n", + sb->s_id); +- return -EINVAL; ++ goto out_free_dev; + } + +- error = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true); +- if (error) { ++ ret = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true); ++ if (ret) { + pr_err("pNFS: failed to register key for device %s.\n", + sb->s_id); +- return -EINVAL; ++ goto out_free_dev; + } + +- error = ops->pr_reserve(sb->s_bdev, NFSD_MDS_PR_KEY, ++ ret = ops->pr_reserve(sb->s_bdev, NFSD_MDS_PR_KEY, + PR_EXCLUSIVE_ACCESS_REG_ONLY, 0); +- if (error) { ++ if (ret) { + pr_err("pNFS: failed to reserve device %s.\n", + sb->s_id); +- return -EINVAL; ++ goto out_free_dev; + } + + return 0; ++ ++out_free_dev: ++ kfree(dev); ++ return ret; + } + + static __be32 +diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c +index a97873f2d22b..6d1b5bb051c5 100644 +--- a/fs/nfsd/nfs4layouts.c ++++ b/fs/nfsd/nfs4layouts.c +@@ -145,8 +145,9 @@ void nfsd4_setup_layout_type(struct svc_export *exp) + #ifdef CONFIG_NFSD_SCSILAYOUT + if (sb->s_export_op->map_blocks && + sb->s_export_op->commit_blocks && +- sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops && +- blk_queue_scsi_passthrough(sb->s_bdev->bd_disk->queue)) ++ sb->s_bdev && ++ sb->s_bdev->bd_disk->fops->pr_ops && ++ sb->s_bdev->bd_disk->fops->get_unique_id) + exp->ex_layout_types |= 1 << LAYOUT_SCSI; + #endif + } +-- +2.35.3 + diff --git a/patches.suse/nilfs2-use-bdev_nr_bytes-instead-of-open-coding-it.patch b/patches.suse/nilfs2-use-bdev_nr_bytes-instead-of-open-coding-it.patch new file mode 100644 index 0000000..4c60a3a --- /dev/null +++ b/patches.suse/nilfs2-use-bdev_nr_bytes-instead-of-open-coding-it.patch @@ -0,0 +1,63 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:19 +0200 +Subject: [PATCH] nilfs2: use bdev_nr_bytes instead of open coding it +Git-commit: 4fcd69798d7f366e1155f3041caf3891ebea72c6 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helper to read the block device size. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Acked-by: Ryusuke Konishi +Link: https://lore.kernel.org/r/20211018101130.1838532-20-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/nilfs2/ioctl.c | 2 +- + fs/nilfs2/super.c | 2 +- + fs/nilfs2/the_nilfs.c | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c +index 640ac8fe891e..1d0583cfd970 100644 +--- a/fs/nilfs2/ioctl.c ++++ b/fs/nilfs2/ioctl.c +@@ -1107,7 +1107,7 @@ static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp) + goto out; + + ret = -ERANGE; +- if (range[1] > i_size_read(inode->i_sb->s_bdev->bd_inode)) ++ if (range[1] > bdev_nr_bytes(inode->i_sb->s_bdev)) + goto out; + + segbytes = nilfs->ns_blocks_per_segment * nilfs->ns_blocksize; +diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c +index f6b2d280aab5..3134c0e42fd4 100644 +--- a/fs/nilfs2/super.c ++++ b/fs/nilfs2/super.c +@@ -403,7 +403,7 @@ int nilfs_resize_fs(struct super_block *sb, __u64 newsize) + int ret; + + ret = -ERANGE; +- devsize = i_size_read(sb->s_bdev->bd_inode); ++ devsize = bdev_nr_bytes(sb->s_bdev); + if (newsize > devsize) + goto out; + +diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c +index c8bfc01da5d7..1bfcb5d3ea48 100644 +--- a/fs/nilfs2/the_nilfs.c ++++ b/fs/nilfs2/the_nilfs.c +@@ -489,7 +489,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, + { + struct nilfs_super_block **sbp = nilfs->ns_sbp; + struct buffer_head **sbh = nilfs->ns_sbh; +- u64 sb2off = NILFS_SB2_OFFSET_BYTES(nilfs->ns_bdev->bd_inode->i_size); ++ u64 sb2off = NILFS_SB2_OFFSET_BYTES(bdev_nr_bytes(nilfs->ns_bdev)); + int valid[2], swp = 0; + + sbp[0] = nilfs_read_super_block(sb, NILFS_SB_OFFSET_BYTES, blocksize, +-- +2.35.3 + diff --git a/patches.suse/nouveau-fix-migrate_to_ram-for-faulting-page.patch b/patches.suse/nouveau-fix-migrate_to_ram-for-faulting-page.patch new file mode 100644 index 0000000..068d857 --- /dev/null +++ b/patches.suse/nouveau-fix-migrate_to_ram-for-faulting-page.patch @@ -0,0 +1,44 @@ +From 97061d441110528dc02972818f2f1dad485107f9 Mon Sep 17 00:00:00 2001 +From: Alistair Popple +Date: Wed, 19 Oct 2022 23:29:34 +1100 +Subject: [PATCH] nouveau: fix migrate_to_ram() for faulting page +Git-commit: 97061d441110528dc02972818f2f1dad485107f9 +Patch-mainline: v6.1-rc2 +References: git-fixes + +Commit 16ce101db85d ("mm/memory.c: fix race when faulting a device private +page") changed the migrate_to_ram() callback to take a reference on the +device page to ensure it can't be freed while handling the fault. +Unfortunately the corresponding update to Nouveau to accommodate this +change was inadvertently dropped from that patch causing GPU to CPU +migration to fail so add it here. + +Link: https://lkml.kernel.org/r/20221019122934.866205-1-apopple@nvidia.com +Fixes: 16ce101db85d ("mm/memory.c: fix race when faulting a device private page") +Signed-off-by: Alistair Popple +Cc: John Hubbard +Cc: Ralph Campbell +Cc: Lyude Paul +Cc: Ben Skeggs +Signed-off-by: Andrew Morton +Acked-by: Takashi Iwai + +--- + drivers/gpu/drm/nouveau/nouveau_dmem.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c +index 5fe209107246..20fe53815b20 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c ++++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c +@@ -176,6 +176,7 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) + .src = &src, + .dst = &dst, + .pgmap_owner = drm->dev, ++ .fault_page = vmf->page, + .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE, + }; + +-- +2.35.3 + diff --git a/patches.suse/ntfs-use-sb_bdev_nr_blocks.patch b/patches.suse/ntfs-use-sb_bdev_nr_blocks.patch new file mode 100644 index 0000000..21dda83 --- /dev/null +++ b/patches.suse/ntfs-use-sb_bdev_nr_blocks.patch @@ -0,0 +1,54 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:28 +0200 +Subject: [PATCH] ntfs: use sb_bdev_nr_blocks +Git-commit: ab70041731a6c2f153120e47746fb303aa6f237a +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the sb_bdev_nr_blocks helper instead of open coding it and clean up +ntfs_fill_super a bit by moving an assignment a little earlier that has +no negative side effects. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Acked-by: Anton Altaparmakov +Link: https://lore.kernel.org/r/20211018101130.1838532-29-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/ntfs/super.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c +index 0d7e948cb29c..5ae8de09b271 100644 +--- a/fs/ntfs/super.c ++++ b/fs/ntfs/super.c +@@ -2772,13 +2772,12 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) + ntfs_debug("Set device block size to %i bytes (block size bits %i).", + blocksize, sb->s_blocksize_bits); + /* Determine the size of the device in units of block_size bytes. */ +- if (!i_size_read(sb->s_bdev->bd_inode)) { ++ vol->nr_blocks = sb_bdev_nr_blocks(sb); ++ if (!vol->nr_blocks) { + if (!silent) + ntfs_error(sb, "Unable to determine device size."); + goto err_out_now; + } +- vol->nr_blocks = i_size_read(sb->s_bdev->bd_inode) >> +- sb->s_blocksize_bits; + /* Read the boot sector and return unlocked buffer head to it. */ + if (!(bh = read_ntfs_boot_sector(sb, silent))) { + if (!silent) +@@ -2816,8 +2815,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) + goto err_out_now; + } + BUG_ON(blocksize != sb->s_blocksize); +- vol->nr_blocks = i_size_read(sb->s_bdev->bd_inode) >> +- sb->s_blocksize_bits; ++ vol->nr_blocks = sb_bdev_nr_blocks(sb); + ntfs_debug("Changed device block size to %i bytes (block size " + "bits %i) to match volume sector size.", + blocksize, sb->s_blocksize_bits); +-- +2.35.3 + diff --git a/patches.suse/null_blk-Fix-handling-of-submit_queues-and-poll_queu.patch b/patches.suse/null_blk-Fix-handling-of-submit_queues-and-poll_queu.patch new file mode 100644 index 0000000..5a45d63 --- /dev/null +++ b/patches.suse/null_blk-Fix-handling-of-submit_queues-and-poll_queu.patch @@ -0,0 +1,223 @@ +From: Shin'ichiro Kawasaki +Date: Fri, 29 Oct 2021 19:39:26 +0900 +Subject: [PATCH] null_blk: Fix handling of submit_queues and poll_queues + attributes +Git-commit: 15dfc662ef31a20b59097d59b0792b06770255fa +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Commit 0a593fbbc245 ("null_blk: poll queue support") introduced the poll +queue feature to null_blk. After this change, null_blk device has both +submit queues and poll queues, and null_map_queues() callback maps the +both queues for corresponding hardware contexts. The commit also added +the device configuration attribute 'poll_queues' in same manner as the +existing attribute 'submit_queues'. These attributes allow to modify the +numbers of queues. However, when the new values are stored to these +attributes, the values are just handled only for the corresponding +queue. When number of submit_queue is updated, number of poll_queue is +not counted, or vice versa. This caused inconsistent number of queues +and queue mapping and resulted in null-ptr-dereference. This failure was +observed in blktests block/029 and block/030. + +To avoid the inconsistency, fix the attribute updates to care both +submit_queues and poll_queues. Introduce the helper function +nullb_update_nr_hw_queues() to handle stores to the both two attributes. +Add poll_queues field to the struct nullb_device to track the number in +same manner as submit_queues. Add two more fields prev_submit_queues and +prev_poll_queues to keep the previous values before change. In case the +block layer failed to update the nr_hw_queues, refer the previous values +in null_map_queues() to map queues in same manner as before change. + +Also add poll_queues value checks in nullb_update_nr_hw_queues() and +null_validate_conf(). They ensure the poll_queues value of each device +is within the range from 1 to module parameter value of poll_queues. + +Fixes: 0a593fbbc245 ("null_blk: poll queue support") +Reported-by: Yi Zhang +Signed-off-by: Shin'ichiro Kawasaki +Link: https://lore.kernel.org/r/20211029103926.845635-1-shinichiro.kawasaki@wdc.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/null_blk/main.c | 102 +++++++++++++++++++++++++----- + drivers/block/null_blk/null_blk.h | 2 + + 2 files changed, 87 insertions(+), 17 deletions(-) + +diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c +index f4af95c2f9a9..323af5c9c802 100644 +--- a/drivers/block/null_blk/main.c ++++ b/drivers/block/null_blk/main.c +@@ -328,30 +328,69 @@ nullb_device_##NAME##_store(struct config_item *item, const char *page, \ + } \ + CONFIGFS_ATTR(nullb_device_, NAME); + +-static int nullb_apply_submit_queues(struct nullb_device *dev, +- unsigned int submit_queues) ++static int nullb_update_nr_hw_queues(struct nullb_device *dev, ++ unsigned int submit_queues, ++ unsigned int poll_queues) ++ + { +- struct nullb *nullb = dev->nullb; + struct blk_mq_tag_set *set; ++ int ret, nr_hw_queues; + +- if (!nullb) ++ if (!dev->nullb) + return 0; + ++ /* ++ * Make sure at least one queue exists for each of submit and poll. ++ */ ++ if (!submit_queues || !poll_queues) ++ return -EINVAL; ++ + /* + * Make sure that null_init_hctx() does not access nullb->queues[] past + * the end of that array. + */ +- if (submit_queues > nr_cpu_ids) ++ if (submit_queues > nr_cpu_ids || poll_queues > g_poll_queues) + return -EINVAL; +- set = nullb->tag_set; +- blk_mq_update_nr_hw_queues(set, submit_queues); +- return set->nr_hw_queues == submit_queues ? 0 : -ENOMEM; ++ ++ /* ++ * Keep previous and new queue numbers in nullb_device for reference in ++ * the call back function null_map_queues(). ++ */ ++ dev->prev_submit_queues = dev->submit_queues; ++ dev->prev_poll_queues = dev->poll_queues; ++ dev->submit_queues = submit_queues; ++ dev->poll_queues = poll_queues; ++ ++ set = dev->nullb->tag_set; ++ nr_hw_queues = submit_queues + poll_queues; ++ blk_mq_update_nr_hw_queues(set, nr_hw_queues); ++ ret = set->nr_hw_queues == nr_hw_queues ? 0 : -ENOMEM; ++ ++ if (ret) { ++ /* on error, revert the queue numbers */ ++ dev->submit_queues = dev->prev_submit_queues; ++ dev->poll_queues = dev->prev_poll_queues; ++ } ++ ++ return ret; ++} ++ ++static int nullb_apply_submit_queues(struct nullb_device *dev, ++ unsigned int submit_queues) ++{ ++ return nullb_update_nr_hw_queues(dev, submit_queues, dev->poll_queues); ++} ++ ++static int nullb_apply_poll_queues(struct nullb_device *dev, ++ unsigned int poll_queues) ++{ ++ return nullb_update_nr_hw_queues(dev, dev->submit_queues, poll_queues); + } + + NULLB_DEVICE_ATTR(size, ulong, NULL); + NULLB_DEVICE_ATTR(completion_nsec, ulong, NULL); + NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues); +-NULLB_DEVICE_ATTR(poll_queues, uint, nullb_apply_submit_queues); ++NULLB_DEVICE_ATTR(poll_queues, uint, nullb_apply_poll_queues); + NULLB_DEVICE_ATTR(home_node, uint, NULL); + NULLB_DEVICE_ATTR(queue_mode, uint, NULL); + NULLB_DEVICE_ATTR(blocksize, uint, NULL); +@@ -599,7 +638,9 @@ static struct nullb_device *null_alloc_dev(void) + dev->size = g_gb * 1024; + dev->completion_nsec = g_completion_nsec; + dev->submit_queues = g_submit_queues; ++ dev->prev_submit_queues = g_submit_queues; + dev->poll_queues = g_poll_queues; ++ dev->prev_poll_queues = g_poll_queues; + dev->home_node = g_home_node; + dev->queue_mode = g_queue_mode; + dev->blocksize = g_bs; +@@ -1465,25 +1506,45 @@ static int null_map_queues(struct blk_mq_tag_set *set) + { + struct nullb *nullb = set->driver_data; + int i, qoff; ++ unsigned int submit_queues = g_submit_queues; ++ unsigned int poll_queues = g_poll_queues; ++ ++ if (nullb) { ++ struct nullb_device *dev = nullb->dev; ++ ++ /* ++ * Refer nr_hw_queues of the tag set to check if the expected ++ * number of hardware queues are prepared. If block layer failed ++ * to prepare them, use previous numbers of submit queues and ++ * poll queues to map queues. ++ */ ++ if (set->nr_hw_queues == ++ dev->submit_queues + dev->poll_queues) { ++ submit_queues = dev->submit_queues; ++ poll_queues = dev->poll_queues; ++ } else if (set->nr_hw_queues == ++ dev->prev_submit_queues + dev->prev_poll_queues) { ++ submit_queues = dev->prev_submit_queues; ++ poll_queues = dev->prev_poll_queues; ++ } else { ++ pr_warn("tag set has unexpected nr_hw_queues: %d\n", ++ set->nr_hw_queues); ++ return -EINVAL; ++ } ++ } + + for (i = 0, qoff = 0; i < set->nr_maps; i++) { + struct blk_mq_queue_map *map = &set->map[i]; + + switch (i) { + case HCTX_TYPE_DEFAULT: +- if (nullb) +- map->nr_queues = nullb->dev->submit_queues; +- else +- map->nr_queues = g_submit_queues; ++ map->nr_queues = submit_queues; + break; + case HCTX_TYPE_READ: + map->nr_queues = 0; + continue; + case HCTX_TYPE_POLL: +- if (nullb) +- map->nr_queues = nullb->dev->poll_queues; +- else +- map->nr_queues = g_poll_queues; ++ map->nr_queues = poll_queues; + break; + } + map->queue_offset = qoff; +@@ -1853,6 +1914,13 @@ static int null_validate_conf(struct nullb_device *dev) + dev->submit_queues = nr_cpu_ids; + else if (dev->submit_queues == 0) + dev->submit_queues = 1; ++ dev->prev_submit_queues = dev->submit_queues; ++ ++ if (dev->poll_queues > g_poll_queues) ++ dev->poll_queues = g_poll_queues; ++ else if (dev->poll_queues == 0) ++ dev->poll_queues = 1; ++ dev->prev_poll_queues = dev->poll_queues; + + dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ); + dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER); +diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h +index 90c3eefb3ca3..78eb56b0ca55 100644 +--- a/drivers/block/null_blk/null_blk.h ++++ b/drivers/block/null_blk/null_blk.h +@@ -86,7 +86,9 @@ struct nullb_device { + unsigned int zone_max_open; /* max number of open zones */ + unsigned int zone_max_active; /* max number of active zones */ + unsigned int submit_queues; /* number of submission queues */ ++ unsigned int prev_submit_queues; /* number of submission queues before change */ + unsigned int poll_queues; /* number of IOPOLL submission queues */ ++ unsigned int prev_poll_queues; /* number of IOPOLL submission queues before change */ + unsigned int home_node; /* home node for the device */ + unsigned int queue_mode; /* block interface */ + unsigned int blocksize; /* block size */ +-- +2.35.3 + diff --git a/patches.suse/null_blk-poll-queue-support.patch b/patches.suse/null_blk-poll-queue-support.patch new file mode 100644 index 0000000..e26fd67 --- /dev/null +++ b/patches.suse/null_blk-poll-queue-support.patch @@ -0,0 +1,264 @@ +From: Jens Axboe +Date: Sat, 17 Apr 2021 09:29:49 -0600 +Subject: [PATCH] null_blk: poll queue support +Git-commit: 0a593fbbc245a85940ed34caa3aa1e4cb060c54b +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +There's currently no way to experiment with polled IO with null_blk, +which seems like an oversight. This patch adds support for polled IO. +We keep a list of issued IOs on submit, and then process that list +when mq_ops->poll() is invoked. + +A new parameter is added, poll_queues. It defaults to 1 like the +submit queues, meaning we'll have 1 poll queue available. + +Fixes-by: Bart Van Assche +Fixes-by: Pavel Begunkov +Reviewed-by: Chaitanya Kulkarni +Link: https://lore.kernel.org/r/baca710d-0f2a-16e2-60bd-b105b854e0ae@kernel.dk +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/null_blk/main.c | 108 ++++++++++++++++++++++++++++-- + drivers/block/null_blk/null_blk.h | 4 ++ + 2 files changed, 108 insertions(+), 4 deletions(-) + +diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c +index e5cbcf582233..f4af95c2f9a9 100644 +--- a/drivers/block/null_blk/main.c ++++ b/drivers/block/null_blk/main.c +@@ -92,6 +92,10 @@ static int g_submit_queues = 1; + module_param_named(submit_queues, g_submit_queues, int, 0444); + MODULE_PARM_DESC(submit_queues, "Number of submission queues"); + ++static int g_poll_queues = 1; ++module_param_named(poll_queues, g_poll_queues, int, 0444); ++MODULE_PARM_DESC(poll_queues, "Number of IOPOLL submission queues"); ++ + static int g_home_node = NUMA_NO_NODE; + module_param_named(home_node, g_home_node, int, 0444); + MODULE_PARM_DESC(home_node, "Home node for the device"); +@@ -347,6 +351,7 @@ static int nullb_apply_submit_queues(struct nullb_device *dev, + NULLB_DEVICE_ATTR(size, ulong, NULL); + NULLB_DEVICE_ATTR(completion_nsec, ulong, NULL); + NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues); ++NULLB_DEVICE_ATTR(poll_queues, uint, nullb_apply_submit_queues); + NULLB_DEVICE_ATTR(home_node, uint, NULL); + NULLB_DEVICE_ATTR(queue_mode, uint, NULL); + NULLB_DEVICE_ATTR(blocksize, uint, NULL); +@@ -466,6 +471,7 @@ static struct configfs_attribute *nullb_device_attrs[] = { + &nullb_device_attr_size, + &nullb_device_attr_completion_nsec, + &nullb_device_attr_submit_queues, ++ &nullb_device_attr_poll_queues, + &nullb_device_attr_home_node, + &nullb_device_attr_queue_mode, + &nullb_device_attr_blocksize, +@@ -593,6 +599,7 @@ static struct nullb_device *null_alloc_dev(void) + dev->size = g_gb * 1024; + dev->completion_nsec = g_completion_nsec; + dev->submit_queues = g_submit_queues; ++ dev->poll_queues = g_poll_queues; + dev->home_node = g_home_node; + dev->queue_mode = g_queue_mode; + dev->blocksize = g_bs; +@@ -1454,12 +1461,80 @@ static bool should_requeue_request(struct request *rq) + return false; + } + ++static int null_map_queues(struct blk_mq_tag_set *set) ++{ ++ struct nullb *nullb = set->driver_data; ++ int i, qoff; ++ ++ for (i = 0, qoff = 0; i < set->nr_maps; i++) { ++ struct blk_mq_queue_map *map = &set->map[i]; ++ ++ switch (i) { ++ case HCTX_TYPE_DEFAULT: ++ if (nullb) ++ map->nr_queues = nullb->dev->submit_queues; ++ else ++ map->nr_queues = g_submit_queues; ++ break; ++ case HCTX_TYPE_READ: ++ map->nr_queues = 0; ++ continue; ++ case HCTX_TYPE_POLL: ++ if (nullb) ++ map->nr_queues = nullb->dev->poll_queues; ++ else ++ map->nr_queues = g_poll_queues; ++ break; ++ } ++ map->queue_offset = qoff; ++ qoff += map->nr_queues; ++ blk_mq_map_queues(map); ++ } ++ ++ return 0; ++} ++ ++static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) ++{ ++ struct nullb_queue *nq = hctx->driver_data; ++ LIST_HEAD(list); ++ int nr = 0; ++ ++ spin_lock(&nq->poll_lock); ++ list_splice_init(&nq->poll_list, &list); ++ spin_unlock(&nq->poll_lock); ++ ++ while (!list_empty(&list)) { ++ struct nullb_cmd *cmd; ++ struct request *req; ++ ++ req = list_first_entry(&list, struct request, queuelist); ++ list_del_init(&req->queuelist); ++ cmd = blk_mq_rq_to_pdu(req); ++ cmd->error = null_process_cmd(cmd, req_op(req), blk_rq_pos(req), ++ blk_rq_sectors(req)); ++ end_cmd(cmd); ++ nr++; ++ } ++ ++ return nr; ++} ++ + static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res) + { ++ struct blk_mq_hw_ctx *hctx = rq->mq_hctx; + struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); + + pr_info("rq %p timed out\n", rq); + ++ if (hctx->type == HCTX_TYPE_POLL) { ++ struct nullb_queue *nq = hctx->driver_data; ++ ++ spin_lock(&nq->poll_lock); ++ list_del_init(&rq->queuelist); ++ spin_unlock(&nq->poll_lock); ++ } ++ + /* + * If the device is marked as blocking (i.e. memory backed or zoned + * device), the submission path may be blocked waiting for resources +@@ -1480,10 +1555,11 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, + struct nullb_queue *nq = hctx->driver_data; + sector_t nr_sectors = blk_rq_sectors(bd->rq); + sector_t sector = blk_rq_pos(bd->rq); ++ const bool is_poll = hctx->type == HCTX_TYPE_POLL; + + might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING); + +- if (nq->dev->irqmode == NULL_IRQ_TIMER) { ++ if (!is_poll && nq->dev->irqmode == NULL_IRQ_TIMER) { + hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cmd->timer.function = null_cmd_timer_expired; + } +@@ -1507,6 +1583,13 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, + return BLK_STS_OK; + } + } ++ ++ if (is_poll) { ++ spin_lock(&nq->poll_lock); ++ list_add_tail(&bd->rq->queuelist, &nq->poll_list); ++ spin_unlock(&nq->poll_lock); ++ return BLK_STS_OK; ++ } + if (cmd->fake_timeout) + return BLK_STS_OK; + +@@ -1542,6 +1625,8 @@ static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) + init_waitqueue_head(&nq->wait); + nq->queue_depth = nullb->queue_depth; + nq->dev = nullb->dev; ++ INIT_LIST_HEAD(&nq->poll_list); ++ spin_lock_init(&nq->poll_lock); + } + + static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, +@@ -1567,6 +1652,8 @@ static const struct blk_mq_ops null_mq_ops = { + .queue_rq = null_queue_rq, + .complete = null_complete_rq, + .timeout = null_timeout_rq, ++ .poll = null_poll, ++ .map_queues = null_map_queues, + .init_hctx = null_init_hctx, + .exit_hctx = null_exit_hctx, + }; +@@ -1663,13 +1750,17 @@ static int setup_commands(struct nullb_queue *nq) + + static int setup_queues(struct nullb *nullb) + { +- nullb->queues = kcalloc(nr_cpu_ids, sizeof(struct nullb_queue), ++ int nqueues = nr_cpu_ids; ++ ++ if (g_poll_queues) ++ nqueues += g_poll_queues; ++ ++ nullb->queues = kcalloc(nqueues, sizeof(struct nullb_queue), + GFP_KERNEL); + if (!nullb->queues) + return -ENOMEM; + + nullb->queue_depth = nullb->dev->hw_queue_depth; +- + return 0; + } + +@@ -1721,9 +1812,14 @@ static int null_gendisk_register(struct nullb *nullb) + + static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) + { ++ int poll_queues; ++ + set->ops = &null_mq_ops; + set->nr_hw_queues = nullb ? nullb->dev->submit_queues : + g_submit_queues; ++ poll_queues = nullb ? nullb->dev->poll_queues : g_poll_queues; ++ if (poll_queues) ++ set->nr_hw_queues += poll_queues; + set->queue_depth = nullb ? nullb->dev->hw_queue_depth : + g_hw_queue_depth; + set->numa_node = nullb ? nullb->dev->home_node : g_home_node; +@@ -1733,7 +1829,11 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) + set->flags |= BLK_MQ_F_NO_SCHED; + if (g_shared_tag_bitmap) + set->flags |= BLK_MQ_F_TAG_HCTX_SHARED; +- set->driver_data = NULL; ++ set->driver_data = nullb; ++ if (g_poll_queues) ++ set->nr_maps = 3; ++ else ++ set->nr_maps = 1; + + if ((nullb && nullb->dev->blocking) || g_blocking) + set->flags |= BLK_MQ_F_BLOCKING; +diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h +index 64bef125d1df..90c3eefb3ca3 100644 +--- a/drivers/block/null_blk/null_blk.h ++++ b/drivers/block/null_blk/null_blk.h +@@ -32,6 +32,9 @@ struct nullb_queue { + struct nullb_device *dev; + unsigned int requeue_selection; + ++ struct list_head poll_list; ++ spinlock_t poll_lock; ++ + struct nullb_cmd *cmds; + }; + +@@ -83,6 +86,7 @@ struct nullb_device { + unsigned int zone_max_open; /* max number of open zones */ + unsigned int zone_max_active; /* max number of active zones */ + unsigned int submit_queues; /* number of submission queues */ ++ unsigned int poll_queues; /* number of IOPOLL submission queues */ + unsigned int home_node; /* home node for the device */ + unsigned int queue_mode; /* block interface */ + unsigned int blocksize; /* block size */ +-- +2.35.3 + diff --git a/patches.suse/nvme-add-support-for-batched-completion-of-polled-IO.patch b/patches.suse/nvme-add-support-for-batched-completion-of-polled-IO.patch new file mode 100644 index 0000000..2fd44f2 --- /dev/null +++ b/patches.suse/nvme-add-support-for-batched-completion-of-polled-IO.patch @@ -0,0 +1,200 @@ +From: Jens Axboe +Date: Fri, 8 Oct 2021 05:59:37 -0600 +Subject: [PATCH] nvme: add support for batched completion of polled IO +Git-commit: c234a65392062504acf04afe0ae404cca61a8e1a +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Take advantage of struct io_comp_batch, if passed in to the nvme poll +handler. If it's set, rather than complete each request individually +inline, store them in the io_comp_batch list. We only do so for requests +that will complete successfully, anything else will be completed inline as +before. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/nvme/host/core.c | 17 ++++++++++++++--- + drivers/nvme/host/nvme.h | 14 ++++++++++++++ + drivers/nvme/host/pci.c | 32 +++++++++++++++++++++++--------- + 3 files changed, 51 insertions(+), 12 deletions(-) + +diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c +index ae15cb714596..3109bdf137e4 100644 +--- a/drivers/nvme/host/core.c ++++ b/drivers/nvme/host/core.c +@@ -346,15 +346,19 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req) + return RETRY; + } + +-static inline void nvme_end_req(struct request *req) ++static inline void nvme_end_req_zoned(struct request *req) + { +- blk_status_t status = nvme_error_status(nvme_req(req)->status); +- + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && + req_op(req) == REQ_OP_ZONE_APPEND) + req->__sector = nvme_lba_to_sect(req->q->queuedata, + le64_to_cpu(nvme_req(req)->result.u64)); ++} ++ ++static inline void nvme_end_req(struct request *req) ++{ ++ blk_status_t status = nvme_error_status(nvme_req(req)->status); + ++ nvme_end_req_zoned(req); + nvme_trace_bio_complete(req); + blk_mq_end_request(req, status); + } +@@ -381,6 +385,13 @@ void nvme_complete_rq(struct request *req) + } + EXPORT_SYMBOL_GPL(nvme_complete_rq); + ++void nvme_complete_batch_req(struct request *req) ++{ ++ nvme_cleanup_cmd(req); ++ nvme_end_req_zoned(req); ++} ++EXPORT_SYMBOL_GPL(nvme_complete_batch_req); ++ + /* + * Called to unwind from ->queue_rq on a failed command submission so that the + * multipathing code gets called to potentially failover to another path. +diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h +index ed79a6c7e804..ef2467b93adb 100644 +--- a/drivers/nvme/host/nvme.h ++++ b/drivers/nvme/host/nvme.h +@@ -638,6 +638,20 @@ static inline bool nvme_is_aen_req(u16 qid, __u16 command_id) + } + + void nvme_complete_rq(struct request *req); ++void nvme_complete_batch_req(struct request *req); ++ ++static __always_inline void nvme_complete_batch(struct io_comp_batch *iob, ++ void (*fn)(struct request *rq)) ++{ ++ struct request *req; ++ ++ rq_list_for_each(&iob->req_list, req) { ++ fn(req); ++ nvme_complete_batch_req(req); ++ } ++ blk_mq_end_request_batch(iob); ++} ++ + blk_status_t nvme_host_path_error(struct request *req); + bool nvme_cancel_request(struct request *req, void *data, bool reserved); + void nvme_cancel_tagset(struct nvme_ctrl *ctrl); +diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c +index bb0482dfab3c..83d3503d5b88 100644 +--- a/drivers/nvme/host/pci.c ++++ b/drivers/nvme/host/pci.c +@@ -959,7 +959,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, + return ret; + } + +-static void nvme_pci_complete_rq(struct request *req) ++static __always_inline void nvme_pci_unmap_rq(struct request *req) + { + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct nvme_dev *dev = iod->nvmeq->dev; +@@ -969,9 +969,19 @@ static void nvme_pci_complete_rq(struct request *req) + rq_integrity_vec(req)->bv_len, rq_data_dir(req)); + if (blk_rq_nr_phys_segments(req)) + nvme_unmap_data(dev, req); ++} ++ ++static void nvme_pci_complete_rq(struct request *req) ++{ ++ nvme_pci_unmap_rq(req); + nvme_complete_rq(req); + } + ++static void nvme_pci_complete_batch(struct io_comp_batch *iob) ++{ ++ nvme_complete_batch(iob, nvme_pci_unmap_rq); ++} ++ + /* We read the CQE phase first to check if the rest of the entry is valid */ + static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq) + { +@@ -996,7 +1006,8 @@ static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq) + return nvmeq->dev->tagset.tags[nvmeq->qid - 1]; + } + +-static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) ++static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, ++ struct io_comp_batch *iob, u16 idx) + { + struct nvme_completion *cqe = &nvmeq->cqes[idx]; + __u16 command_id = READ_ONCE(cqe->command_id); +@@ -1023,7 +1034,9 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) + } + + trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail); +- if (!nvme_try_complete_req(req, cqe->status, cqe->result)) ++ if (!nvme_try_complete_req(req, cqe->status, cqe->result) && ++ !blk_mq_add_to_batch(req, iob, nvme_req(req)->status, ++ nvme_pci_complete_batch)) + nvme_pci_complete_rq(req); + } + +@@ -1039,7 +1052,8 @@ static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) + } + } + +-static inline int nvme_process_cq(struct nvme_queue *nvmeq) ++static inline int nvme_poll_cq(struct nvme_queue *nvmeq, ++ struct io_comp_batch *iob) + { + int found = 0; + +@@ -1050,7 +1064,7 @@ static inline int nvme_process_cq(struct nvme_queue *nvmeq) + * the cqe requires a full read memory barrier + */ + dma_rmb(); +- nvme_handle_cqe(nvmeq, nvmeq->cq_head); ++ nvme_handle_cqe(nvmeq, iob, nvmeq->cq_head); + nvme_update_cq_head(nvmeq); + } + +@@ -1063,7 +1077,7 @@ static irqreturn_t nvme_irq(int irq, void *data) + { + struct nvme_queue *nvmeq = data; + +- if (nvme_process_cq(nvmeq)) ++ if (nvme_poll_cq(nvmeq, NULL)) + return IRQ_HANDLED; + return IRQ_NONE; + } +@@ -1088,7 +1102,7 @@ static void nvme_poll_irqdisable(struct nvme_queue *nvmeq) + WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags)); + + disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); +- nvme_process_cq(nvmeq); ++ nvme_poll_cq(nvmeq, NULL); + enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); + } + +@@ -1101,7 +1115,7 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) + return 0; + + spin_lock(&nvmeq->cq_poll_lock); +- found = nvme_process_cq(nvmeq); ++ found = nvme_poll_cq(nvmeq, iob); + spin_unlock(&nvmeq->cq_poll_lock); + + return found; +@@ -1434,7 +1448,7 @@ static void nvme_reap_pending_cqes(struct nvme_dev *dev) + + for (i = dev->ctrl.queue_count - 1; i > 0; i--) { + spin_lock(&dev->queues[i].cq_poll_lock); +- nvme_process_cq(&dev->queues[i]); ++ nvme_poll_cq(&dev->queues[i], NULL); + spin_unlock(&dev->queues[i].cq_poll_lock); + } + } +-- +2.35.3 + diff --git a/patches.suse/nvme-don-t-memset-the-normal-read-write-command.patch b/patches.suse/nvme-don-t-memset-the-normal-read-write-command.patch new file mode 100644 index 0000000..7b3e833 --- /dev/null +++ b/patches.suse/nvme-don-t-memset-the-normal-read-write-command.patch @@ -0,0 +1,62 @@ +From: Jens Axboe +Date: Mon, 18 Oct 2021 06:47:18 -0600 +Subject: [PATCH] nvme: don't memset() the normal read/write command +Git-commit: a9a7e30fd918588bc312ba782426e3a1282df359 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +This memset in the fast path costs a lot of cycles on my setup. Here's a +top-of-profile of doing ~6.7M IOPS: + ++ 5.90% io_uring [nvme] [k] nvme_queue_rq ++ 5.32% io_uring [nvme_core] [k] nvme_setup_cmd ++ 5.17% io_uring [kernel.vmlinux] [k] io_submit_sqes ++ 4.97% io_uring [kernel.vmlinux] [k] blkdev_direct_IO + +and a perf diff with this patch: + + 0.92% +4.40% [nvme_core] [k] nvme_setup_cmd + +reducing it from 5.3% to only 0.9%. This takes it from the 2nd most +cycle consumer to something that's mostly irrelevant. + +Reviewed-by: Chaitanya Kulkarni +Reviewed-by: Keith Busch +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/nvme/host/core.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c +index 824790bed2f5..c415c3faf420 100644 +--- a/drivers/nvme/host/core.c ++++ b/drivers/nvme/host/core.c +@@ -929,8 +929,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, + u16 control = 0; + u32 dsmgmt = 0; + +- memset(cmnd, 0, sizeof(*cmnd)); +- + if (req->cmd_flags & REQ_FUA) + control |= NVME_RW_FUA; + if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD)) +@@ -940,9 +938,15 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, + dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; + + cmnd->rw.opcode = op; ++ cmnd->rw.flags = 0; + cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id); ++ cmnd->rw.rsvd2 = 0; ++ cmnd->rw.metadata = 0; + cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); + cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); ++ cmnd->rw.reftag = 0; ++ cmnd->rw.apptag = 0; ++ cmnd->rw.appmask = 0; + + if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams) + nvme_assign_write_stream(ctrl, req, &control, &dsmgmt); +-- +2.35.3 + diff --git a/patches.suse/nvme-move-command-clear-into-the-various-setup-helpe.patch b/patches.suse/nvme-move-command-clear-into-the-various-setup-helpe.patch new file mode 100644 index 0000000..f1e0e0f --- /dev/null +++ b/patches.suse/nvme-move-command-clear-into-the-various-setup-helpe.patch @@ -0,0 +1,87 @@ +From: Jens Axboe +Date: Mon, 18 Oct 2021 06:45:06 -0600 +Subject: [PATCH] nvme: move command clear into the various setup helpers +Git-commit: 9c3d29296fe4c297447d2055e7a9535c981a8370 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We don't have to worry about doing extra memsets by moving it outside +the protection of RQF_DONTPREP, as nvme doesn't do partial completions. + +This is in preparation for making the read/write fast path not do a full +memset of the command. + +Reviewed-by: Keith Busch +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/nvme/host/core.c | 10 +++++++--- + drivers/nvme/host/zns.c | 2 ++ + 2 files changed, 9 insertions(+), 3 deletions(-) + +diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c +index 3109bdf137e4..824790bed2f5 100644 +--- a/drivers/nvme/host/core.c ++++ b/drivers/nvme/host/core.c +@@ -834,6 +834,7 @@ static void nvme_assign_write_stream(struct nvme_ctrl *ctrl, + static inline void nvme_setup_flush(struct nvme_ns *ns, + struct nvme_command *cmnd) + { ++ memset(cmnd, 0, sizeof(*cmnd)); + cmnd->common.opcode = nvme_cmd_flush; + cmnd->common.nsid = cpu_to_le32(ns->head->ns_id); + } +@@ -885,6 +886,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, + return BLK_STS_IOERR; + } + ++ memset(cmnd, 0, sizeof(*cmnd)); + cmnd->dsm.opcode = nvme_cmd_dsm; + cmnd->dsm.nsid = cpu_to_le32(ns->head->ns_id); + cmnd->dsm.nr = cpu_to_le32(segments - 1); +@@ -901,6 +903,8 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, + static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, + struct request *req, struct nvme_command *cmnd) + { ++ memset(cmnd, 0, sizeof(*cmnd)); ++ + if (ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) + return nvme_setup_discard(ns, req, cmnd); + +@@ -925,6 +929,8 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, + u16 control = 0; + u32 dsmgmt = 0; + ++ memset(cmnd, 0, sizeof(*cmnd)); ++ + if (req->cmd_flags & REQ_FUA) + control |= NVME_RW_FUA; + if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD)) +@@ -993,10 +999,8 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req) + struct nvme_ctrl *ctrl = nvme_req(req)->ctrl; + blk_status_t ret = BLK_STS_OK; + +- if (!(req->rq_flags & RQF_DONTPREP)) { ++ if (!(req->rq_flags & RQF_DONTPREP)) + nvme_clear_nvme_request(req); +- memset(cmd, 0, sizeof(*cmd)); +- } + + switch (req_op(req)) { + case REQ_OP_DRV_IN: +diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c +index d95010481fce..bfc259e0d7b8 100644 +--- a/drivers/nvme/host/zns.c ++++ b/drivers/nvme/host/zns.c +@@ -233,6 +233,8 @@ int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, + blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req, + struct nvme_command *c, enum nvme_zone_mgmt_action action) + { ++ memset(c, 0, sizeof(*c)); ++ + c->zms.opcode = nvme_cmd_zone_mgmt_send; + c->zms.nsid = cpu_to_le32(ns->head->ns_id); + c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); +-- +2.35.3 + diff --git a/patches.suse/nvme-multipath-enable-polled-I-O.patch b/patches.suse/nvme-multipath-enable-polled-I-O.patch new file mode 100644 index 0000000..85ea0c7 --- /dev/null +++ b/patches.suse/nvme-multipath-enable-polled-I-O.patch @@ -0,0 +1,57 @@ +From: Christoph Hellwig +Date: Tue, 12 Oct 2021 13:12:26 +0200 +Subject: [PATCH] nvme-multipath: enable polled I/O +Git-commit: c712dccc64357b94f93e57882373e1365f0e2a56 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Set the poll queue flag to enable polling, given that the multipath +node just dispatches the bios to a lower queue. + +Signed-off-by: Christoph Hellwig +Tested-by: Mark Wunderlich +Link: https://lore.kernel.org/r/20211012111226.760968-17-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/nvme/host/multipath.c | 16 +++++++++++++++- + 1 file changed, 15 insertions(+), 1 deletion(-) + +diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c +index ab78aa5d28c6..11440c86881e 100644 +--- a/drivers/nvme/host/multipath.c ++++ b/drivers/nvme/host/multipath.c +@@ -85,8 +85,13 @@ void nvme_failover_req(struct request *req) + } + + spin_lock_irqsave(&ns->head->requeue_lock, flags); +- for (bio = req->bio; bio; bio = bio->bi_next) ++ for (bio = req->bio; bio; bio = bio->bi_next) { + bio_set_dev(bio, ns->head->disk->part0); ++ if (bio->bi_opf & REQ_POLLED) { ++ bio->bi_opf &= ~REQ_POLLED; ++ bio->bi_cookie = BLK_QC_T_NONE; ++ } ++ } + blk_steal_bios(&ns->head->requeue_list, req); + spin_unlock_irqrestore(&ns->head->requeue_lock, flags); + +@@ -477,6 +482,15 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) + + blk_queue_flag_set(QUEUE_FLAG_NONROT, head->disk->queue); + blk_queue_flag_set(QUEUE_FLAG_NOWAIT, head->disk->queue); ++ /* ++ * This assumes all controllers that refer to a namespace either ++ * support poll queues or not. That is not a strict guarantee, ++ * but if the assumption is wrong the effect is only suboptimal ++ * performance but not correctness problem. ++ */ ++ if (ctrl->tagset->nr_maps > HCTX_TYPE_POLL && ++ ctrl->tagset->map[HCTX_TYPE_POLL].nr_queues) ++ blk_queue_flag_set(QUEUE_FLAG_POLL, head->disk->queue); + + /* set to a default value of 512 until the disk is validated */ + blk_queue_logical_block_size(head->disk->queue, 512); +-- +2.35.3 + diff --git a/patches.suse/nvme-wire-up-completion-batching-for-the-IRQ-path.patch b/patches.suse/nvme-wire-up-completion-batching-for-the-IRQ-path.patch new file mode 100644 index 0000000..7c04188 --- /dev/null +++ b/patches.suse/nvme-wire-up-completion-batching-for-the-IRQ-path.patch @@ -0,0 +1,61 @@ +From: Jens Axboe +Date: Mon, 18 Oct 2021 08:45:39 -0600 +Subject: [PATCH] nvme: wire up completion batching for the IRQ path +Git-commit: 4f5022453acd0f7b28012e20b7d048470f129894 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Trivial to do now, just need our own io_comp_batch on the stack and pass +that in to the usual command completion handling. + +I pondered making this dependent on how many entries we had to process, +but even for a single entry there's no discernable difference in +performance or latency. Running a sync workload over io_uring: + +t/io_uring -b512 -d1 -s1 -c1 -p0 -F1 -B1 -n2 /dev/nvme1n1 /dev/nvme2n1 + +yields the below performance before the patch: + +IOPS=254820, BW=124MiB/s, IOS/call=1/1, inflight=(1 1) +IOPS=251174, BW=122MiB/s, IOS/call=1/1, inflight=(1 1) +IOPS=250806, BW=122MiB/s, IOS/call=1/1, inflight=(1 1) + +and the following after: + +IOPS=255972, BW=124MiB/s, IOS/call=1/1, inflight=(1 1) +IOPS=251920, BW=123MiB/s, IOS/call=1/1, inflight=(1 1) +IOPS=251794, BW=122MiB/s, IOS/call=1/1, inflight=(1 1) + +which definitely isn't slower, about the same if you factor in a bit of +variance. For peak performance workloads, benchmarking shows a 2% +improvement. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/nvme/host/pci.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c +index 83d3503d5b88..ed684874842f 100644 +--- a/drivers/nvme/host/pci.c ++++ b/drivers/nvme/host/pci.c +@@ -1076,9 +1076,13 @@ static inline int nvme_poll_cq(struct nvme_queue *nvmeq, + static irqreturn_t nvme_irq(int irq, void *data) + { + struct nvme_queue *nvmeq = data; ++ DEFINE_IO_COMP_BATCH(iob); + +- if (nvme_poll_cq(nvmeq, NULL)) ++ if (nvme_poll_cq(nvmeq, &iob)) { ++ if (!rq_list_empty(iob.req_list)) ++ nvme_pci_complete_batch(&iob); + return IRQ_HANDLED; ++ } + return IRQ_NONE; + } + +-- +2.35.3 + diff --git a/patches.suse/nvmet-use-bdev_nr_bytes-instead-of-open-coding-it.patch b/patches.suse/nvmet-use-bdev_nr_bytes-instead-of-open-coding-it.patch new file mode 100644 index 0000000..40d5eef --- /dev/null +++ b/patches.suse/nvmet-use-bdev_nr_bytes-instead-of-open-coding-it.patch @@ -0,0 +1,44 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:07 +0200 +Subject: [PATCH] nvmet: use bdev_nr_bytes instead of open coding it +Git-commit: c68f3ef777939ba70e167711754d5a27bfb6e51b +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helper to read the block device size. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Reviewed-by: Chaitanya Kulkarni +Link: https://lore.kernel.org/r/20211018101130.1838532-8-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/nvme/target/io-cmd-bdev.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c +index 6139e1de50a6..70ca9dfc1771 100644 +--- a/drivers/nvme/target/io-cmd-bdev.c ++++ b/drivers/nvme/target/io-cmd-bdev.c +@@ -87,7 +87,7 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns) + ns->bdev = NULL; + return ret; + } +- ns->size = i_size_read(ns->bdev->bd_inode); ++ ns->size = bdev_nr_bytes(ns->bdev); + ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); + + ns->pi_type = 0; +@@ -108,7 +108,7 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns) + + void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns) + { +- ns->size = i_size_read(ns->bdev->bd_inode); ++ ns->size = bdev_nr_bytes(ns->bdev); + } + + u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts) +-- +2.35.3 + diff --git a/patches.suse/oracleasm-reinstate-bio_map_user_iov-declaration-in-.patch b/patches.suse/oracleasm-reinstate-bio_map_user_iov-declaration-in-.patch index 51fdc2d..9cdb34c 100644 --- a/patches.suse/oracleasm-reinstate-bio_map_user_iov-declaration-in-.patch +++ b/patches.suse/oracleasm-reinstate-bio_map_user_iov-declaration-in-.patch @@ -41,11 +41,11 @@ Signed-off-by: Jessica Yu +struct request; extern int submit_bio_wait(struct bio *bio); - extern void bio_advance(struct bio *, unsigned); + extern void bio_init(struct bio *bio, struct bio_vec *table, @@ -473,6 +474,7 @@ void __bio_add_page(struct bio *bio, str - unsigned int len, unsigned int off); int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); - void bio_release_pages(struct bio *bio, bool mark_dirty); + void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter); + void __bio_release_pages(struct bio *bio, bool mark_dirty); +extern int bio_map_user_iov(struct request *, struct iov_iter *, gfp_t); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); diff --git a/patches.suse/page_pool-Add-callback-to-init-pages-when-they-are-a.patch b/patches.suse/page_pool-Add-callback-to-init-pages-when-they-are-a.patch new file mode 100644 index 0000000..551e49f --- /dev/null +++ b/patches.suse/page_pool-Add-callback-to-init-pages-when-they-are-a.patch @@ -0,0 +1,48 @@ +From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= +Date: Mon, 3 Jan 2022 16:08:07 +0100 +Subject: page_pool: Add callback to init pages when they are allocated +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.17-rc1 +Git-commit: 35b2e549894b7ef0b6e7f3a70c2ab75b767cfce9 +References: jsc#PED-1368 + +Add a new callback function to page_pool that, if set, will be called every +time a new page is allocated. This will be used from bpf_test_run() to +initialise the page data with the data provided by userspace when running +XDP programs with redirect turned on. + +Signed-off-by: Toke Høiland-Jørgensen +Signed-off-by: Alexei Starovoitov +Acked-by: John Fastabend +Acked-by: Jesper Dangaard Brouer +Link: https://lore.kernel.org/bpf/20220103150812.87914-3-toke@redhat.com +Acked-by: Shung-Hsi Yu +--- + include/net/page_pool.h | 2 ++ + net/core/page_pool.c | 2 ++ + 2 files changed, 4 insertions(+) + +--- a/include/net/page_pool.h ++++ b/include/net/page_pool.h +@@ -80,6 +80,8 @@ struct page_pool_params { + enum dma_data_direction dma_dir; /* DMA mapping direction */ + unsigned int max_len; /* max DMA sync memory size */ + unsigned int offset; /* DMA addr offset */ ++ void (*init_callback)(struct page *page, void *arg); ++ void *init_arg; + }; + + struct page_pool { +--- a/net/core/page_pool.c ++++ b/net/core/page_pool.c +@@ -217,6 +217,8 @@ static void page_pool_set_pp_info(struct + { + page->pp = pool; + page->pp_magic |= PP_SIGNATURE; ++ if (pool->p.init_callback) ++ pool->p.init_callback(page, pool->p.init_arg); + } + + static void page_pool_clear_pp_info(struct page *page) diff --git a/patches.suse/page_pool-Store-the-XDP-mem-id.patch b/patches.suse/page_pool-Store-the-XDP-mem-id.patch new file mode 100644 index 0000000..32e1367 --- /dev/null +++ b/patches.suse/page_pool-Store-the-XDP-mem-id.patch @@ -0,0 +1,85 @@ +From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= +Date: Mon, 3 Jan 2022 16:08:08 +0100 +Subject: page_pool: Store the XDP mem id +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.17-rc1 +Git-commit: 64693ec7774e471f817a725686d93903e919a2e5 +References: jsc#PED-1368 + +Store the XDP mem ID inside the page_pool struct so it can be retrieved +later for use in bpf_prog_run(). + +Signed-off-by: Toke Høiland-Jørgensen +Signed-off-by: Alexei Starovoitov +Acked-by: Jesper Dangaard Brouer +Link: https://lore.kernel.org/bpf/20220103150812.87914-4-toke@redhat.com +Acked-by: Shung-Hsi Yu +--- + include/net/page_pool.h | 9 +++++++-- + net/core/page_pool.c | 4 +++- + net/core/xdp.c | 2 +- + 3 files changed, 11 insertions(+), 4 deletions(-) + +--- a/include/net/page_pool.h ++++ b/include/net/page_pool.h +@@ -96,6 +96,7 @@ struct page_pool { + unsigned int frag_offset; + struct page *frag_page; + long frag_users; ++ u32 xdp_mem_id; + + /* + * Data structure for allocation side +@@ -170,9 +171,12 @@ bool page_pool_return_skb_page(struct pa + + struct page_pool *page_pool_create(const struct page_pool_params *params); + ++struct xdp_mem_info; ++ + #ifdef CONFIG_PAGE_POOL + void page_pool_destroy(struct page_pool *pool); +-void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)); ++void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), ++ struct xdp_mem_info *mem); + void page_pool_release_page(struct page_pool *pool, struct page *page); + void page_pool_put_page_bulk(struct page_pool *pool, void **data, + int count); +@@ -182,7 +186,8 @@ static inline void page_pool_destroy(str + } + + static inline void page_pool_use_xdp_mem(struct page_pool *pool, +- void (*disconnect)(void *)) ++ void (*disconnect)(void *), ++ struct xdp_mem_info *mem) + { + } + static inline void page_pool_release_page(struct page_pool *pool, +--- a/net/core/page_pool.c ++++ b/net/core/page_pool.c +@@ -693,10 +693,12 @@ static void page_pool_release_retry(stru + schedule_delayed_work(&pool->release_dw, DEFER_TIME); + } + +-void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)) ++void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), ++ struct xdp_mem_info *mem) + { + refcount_inc(&pool->user_cnt); + pool->disconnect = disconnect; ++ pool->xdp_mem_id = mem->id; + } + + void page_pool_destroy(struct page_pool *pool) +--- a/net/core/xdp.c ++++ b/net/core/xdp.c +@@ -322,7 +322,7 @@ static struct xdp_mem_allocator *__xdp_r + } + + if (type == MEM_TYPE_PAGE_POOL) +- page_pool_use_xdp_mem(allocator, mem_allocator_disconnect); ++ page_pool_use_xdp_mem(allocator, mem_allocator_disconnect, mem); + + mutex_unlock(&mem_id_lock); + diff --git a/patches.suse/partitions-efi-use-bdev_nr_bytes-instead-of-open-cod.patch b/patches.suse/partitions-efi-use-bdev_nr_bytes-instead-of-open-cod.patch new file mode 100644 index 0000000..cf13a83 --- /dev/null +++ b/patches.suse/partitions-efi-use-bdev_nr_bytes-instead-of-open-cod.patch @@ -0,0 +1,33 @@ +From: Christoph Hellwig +Date: Tue, 19 Oct 2021 08:20:23 +0200 +Subject: [PATCH] partitions/efi: use bdev_nr_bytes instead of open coding it +Git-commit: f9831b885709978ed9d16833ceeb3a2ec174a2d2 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helper to read the block device size. + +Signed-off-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20211019062024.2171074-3-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/partitions/efi.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/partitions/efi.c b/block/partitions/efi.c +index 7ca5c4c374d4..5e9be13a56a8 100644 +--- a/block/partitions/efi.c ++++ b/block/partitions/efi.c +@@ -133,7 +133,7 @@ efi_crc32(const void *buf, unsigned long len) + */ + static u64 last_lba(struct gendisk *disk) + { +- return div_u64(disk->part0->bd_inode->i_size, ++ return div_u64(bdev_nr_bytes(disk->part0), + queue_logical_block_size(disk->queue)) - 1ULL; + } + +-- +2.35.3 + diff --git a/patches.suse/partitions-ibm-use-bdev_nr_sectors-instead-of-open-c.patch b/patches.suse/partitions-ibm-use-bdev_nr_sectors-instead-of-open-c.patch new file mode 100644 index 0000000..7ef44d1 --- /dev/null +++ b/patches.suse/partitions-ibm-use-bdev_nr_sectors-instead-of-open-c.patch @@ -0,0 +1,100 @@ +From: Christoph Hellwig +Date: Tue, 19 Oct 2021 08:20:24 +0200 +Subject: [PATCH] partitions/ibm: use bdev_nr_sectors instead of open coding it +Git-commit: 97eeb5fc14cc4b2091df8b841a07a1ac69f2d762 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helper to read the block device size and switch various +places to pass the size in terms of sectors which is more practical. + +Signed-off-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20211019062024.2171074-4-hch@lst.de +[axboe: fix comment typo] +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + block/partitions/ibm.c | 19 ++++++++++--------- + 1 file changed, 10 insertions(+), 9 deletions(-) + +diff --git a/block/partitions/ibm.c b/block/partitions/ibm.c +index 9bca396aef4a..403756dbd50d 100644 +--- a/block/partitions/ibm.c ++++ b/block/partitions/ibm.c +@@ -198,7 +198,7 @@ static int find_lnx1_partitions(struct parsed_partitions *state, + char name[], + union label_t *label, + sector_t labelsect, +- loff_t i_size, ++ sector_t nr_sectors, + dasd_information2_t *info) + { + loff_t offset, geo_size, size; +@@ -213,14 +213,14 @@ static int find_lnx1_partitions(struct parsed_partitions *state, + } else { + /* + * Formated w/o large volume support. If the sanity check +- * 'size based on geo == size based on i_size' is true, then ++ * 'size based on geo == size based on nr_sectors' is true, then + * we can safely assume that we know the formatted size of + * the disk, otherwise we need additional information + * that we can only get from a real DASD device. + */ + geo_size = geo->cylinders * geo->heads + * geo->sectors * secperblk; +- size = i_size >> 9; ++ size = nr_sectors; + if (size != geo_size) { + if (!info) { + strlcat(state->pp_buf, "\n", PAGE_SIZE); +@@ -229,7 +229,7 @@ static int find_lnx1_partitions(struct parsed_partitions *state, + if (!strcmp(info->type, "ECKD")) + if (geo_size < size) + size = geo_size; +- /* else keep size based on i_size */ ++ /* else keep size based on nr_sectors */ + } + } + /* first and only partition starts in the first block after the label */ +@@ -293,7 +293,8 @@ int ibm_partition(struct parsed_partitions *state) + struct gendisk *disk = state->disk; + struct block_device *bdev = disk->part0; + int blocksize, res; +- loff_t i_size, offset, size; ++ loff_t offset, size; ++ sector_t nr_sectors; + dasd_information2_t *info; + struct hd_geometry *geo; + char type[5] = {0,}; +@@ -308,8 +309,8 @@ int ibm_partition(struct parsed_partitions *state) + blocksize = bdev_logical_block_size(bdev); + if (blocksize <= 0) + goto out_symbol; +- i_size = i_size_read(bdev->bd_inode); +- if (i_size == 0) ++ nr_sectors = bdev_nr_sectors(bdev); ++ if (nr_sectors == 0) + goto out_symbol; + info = kmalloc(sizeof(dasd_information2_t), GFP_KERNEL); + if (info == NULL) +@@ -336,7 +337,7 @@ int ibm_partition(struct parsed_partitions *state) + label); + } else if (!strncmp(type, "LNX1", 4)) { + res = find_lnx1_partitions(state, geo, blocksize, name, +- label, labelsect, i_size, ++ label, labelsect, nr_sectors, + info); + } else if (!strncmp(type, "CMS1", 4)) { + res = find_cms1_partitions(state, geo, blocksize, name, +@@ -353,7 +354,7 @@ int ibm_partition(struct parsed_partitions *state) + res = 1; + if (info->format == DASD_FORMAT_LDL) { + strlcat(state->pp_buf, "(nonl)", PAGE_SIZE); +- size = i_size >> 9; ++ size = nr_sectors; + offset = (info->label_block + 1) * (blocksize >> 9); + put_partition(state, 1, offset, size-offset); + strlcat(state->pp_buf, "\n", PAGE_SIZE); +-- +2.35.3 + diff --git a/patches.suse/pcd-add-error-handling-support-for-add_disk.patch b/patches.suse/pcd-add-error-handling-support-for-add_disk.patch new file mode 100644 index 0000000..c99754e --- /dev/null +++ b/patches.suse/pcd-add-error-handling-support-for-add_disk.patch @@ -0,0 +1,40 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:01:07 -0700 +Subject: [PATCH] pcd: add error handling support for add_disk() +Git-commit: 4dfbd1390af60765774d9565858d1a6fadacde32 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +Signed-off-by: Luis Chamberlain +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/paride/pcd.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c +index 93ed63626232..a7fab3830d7b 100644 +--- a/drivers/block/paride/pcd.c ++++ b/drivers/block/paride/pcd.c +@@ -941,9 +941,13 @@ static int pcd_init_unit(struct pcd_unit *cd, bool autoprobe, int port, + cd->present = 1; + pcd_probe_capabilities(cd); + register_cdrom(cd->disk, &cd->info); +- add_disk(cd->disk); ++ ret = add_disk(cd->disk); ++ if (ret) ++ goto out_unreg_cdrom; + return 0; + ++out_unreg_cdrom: ++ unregister_cdrom(&cd->info); + out_pi_release: + pi_release(cd->pi); + out_free_disk: +-- +2.35.3 + diff --git a/patches.suse/pcd-capture-errors-on-cdrom_register.patch b/patches.suse/pcd-capture-errors-on-cdrom_register.patch new file mode 100644 index 0000000..0f6bd80 --- /dev/null +++ b/patches.suse/pcd-capture-errors-on-cdrom_register.patch @@ -0,0 +1,35 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:01:09 -0700 +Subject: [PATCH] pcd: capture errors on cdrom_register() +Git-commit: b6fa069971bc427e19b8f3882a808f24530994ed +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +No errors were being captured wehen cdrom_register() fails, +capture the error and return the error. + +Signed-off-by: Luis Chamberlain +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/paride/pcd.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c +index 82a654fc4db8..4cc0d141db78 100644 +--- a/drivers/block/paride/pcd.c ++++ b/drivers/block/paride/pcd.c +@@ -940,7 +940,9 @@ static int pcd_init_unit(struct pcd_unit *cd, bool autoprobe, int port, + + cd->present = 1; + pcd_probe_capabilities(cd); +- register_cdrom(cd->disk, &cd->info); ++ ret = register_cdrom(cd->disk, &cd->info); ++ if (ret) ++ goto out_pi_release; + ret = add_disk(cd->disk); + if (ret) + goto out_unreg_cdrom; +-- +2.35.3 + diff --git a/patches.suse/pcd-cleanup-initialization.patch b/patches.suse/pcd-cleanup-initialization.patch new file mode 100644 index 0000000..9fa07ae --- /dev/null +++ b/patches.suse/pcd-cleanup-initialization.patch @@ -0,0 +1,370 @@ +From: Christoph Hellwig +Date: Mon, 27 Sep 2021 15:01:04 -0700 +Subject: [PATCH] pcd: cleanup initialization +Git-commit: af761f277b7fd896c27cb1100b25f11567987822 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Refactor the pcd initialization to have a dedicated helper to initialize +a single disk. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/paride/pcd.c | 286 ++++++++++++++++--------------------- + 1 file changed, 127 insertions(+), 159 deletions(-) + +diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c +index 8903fdaa2046..93ed63626232 100644 +--- a/drivers/block/paride/pcd.c ++++ b/drivers/block/paride/pcd.c +@@ -183,8 +183,6 @@ static int pcd_audio_ioctl(struct cdrom_device_info *cdi, + static int pcd_packet(struct cdrom_device_info *cdi, + struct packet_command *cgc); + +-static int pcd_detect(void); +-static void pcd_probe_capabilities(void); + static void do_pcd_read_drq(void); + static blk_status_t pcd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd); +@@ -302,53 +300,6 @@ static const struct blk_mq_ops pcd_mq_ops = { + .queue_rq = pcd_queue_rq, + }; + +-static void pcd_init_units(void) +-{ +- struct pcd_unit *cd; +- int unit; +- +- pcd_drive_count = 0; +- for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { +- struct gendisk *disk; +- +- if (blk_mq_alloc_sq_tag_set(&cd->tag_set, &pcd_mq_ops, 1, +- BLK_MQ_F_SHOULD_MERGE)) +- continue; +- +- disk = blk_mq_alloc_disk(&cd->tag_set, cd); +- if (IS_ERR(disk)) { +- blk_mq_free_tag_set(&cd->tag_set); +- continue; +- } +- +- INIT_LIST_HEAD(&cd->rq_list); +- blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); +- cd->disk = disk; +- cd->pi = &cd->pia; +- cd->present = 0; +- cd->last_sense = 0; +- cd->changed = 1; +- cd->drive = (*drives[unit])[D_SLV]; +- if ((*drives[unit])[D_PRT]) +- pcd_drive_count++; +- +- cd->name = &cd->info.name[0]; +- snprintf(cd->name, sizeof(cd->info.name), "%s%d", name, unit); +- cd->info.ops = &pcd_dops; +- cd->info.handle = cd; +- cd->info.speed = 0; +- cd->info.capacity = 1; +- cd->info.mask = 0; +- disk->major = major; +- disk->first_minor = unit; +- disk->minors = 1; +- strcpy(disk->disk_name, cd->name); /* umm... */ +- disk->fops = &pcd_bdops; +- disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; +- disk->events = DISK_EVENT_MEDIA_CHANGE; +- } +-} +- + static int pcd_open(struct cdrom_device_info *cdi, int purpose) + { + struct pcd_unit *cd = cdi->handle; +@@ -679,90 +630,31 @@ static int pcd_probe(struct pcd_unit *cd, int ms) + return -1; + } + +-static void pcd_probe_capabilities(void) ++static int pcd_probe_capabilities(struct pcd_unit *cd) + { +- int unit, r; +- char buffer[32]; + char cmd[12] = { 0x5a, 1 << 3, 0x2a, 0, 0, 0, 0, 18, 0, 0, 0, 0 }; +- struct pcd_unit *cd; +- +- for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { +- if (!cd->present) +- continue; +- r = pcd_atapi(cd, cmd, 18, buffer, "mode sense capabilities"); +- if (r) +- continue; +- /* we should now have the cap page */ +- if ((buffer[11] & 1) == 0) +- cd->info.mask |= CDC_CD_R; +- if ((buffer[11] & 2) == 0) +- cd->info.mask |= CDC_CD_RW; +- if ((buffer[12] & 1) == 0) +- cd->info.mask |= CDC_PLAY_AUDIO; +- if ((buffer[14] & 1) == 0) +- cd->info.mask |= CDC_LOCK; +- if ((buffer[14] & 8) == 0) +- cd->info.mask |= CDC_OPEN_TRAY; +- if ((buffer[14] >> 6) == 0) +- cd->info.mask |= CDC_CLOSE_TRAY; +- } +-} +- +-static int pcd_detect(void) +-{ +- int k, unit; +- struct pcd_unit *cd; +- +- printk("%s: %s version %s, major %d, nice %d\n", +- name, name, PCD_VERSION, major, nice); ++ char buffer[32]; ++ int ret; + +- par_drv = pi_register_driver(name); +- if (!par_drv) { +- pr_err("failed to register %s driver\n", name); +- return -1; +- } ++ ret = pcd_atapi(cd, cmd, 18, buffer, "mode sense capabilities"); ++ if (ret) ++ return ret; ++ ++ /* we should now have the cap page */ ++ if ((buffer[11] & 1) == 0) ++ cd->info.mask |= CDC_CD_R; ++ if ((buffer[11] & 2) == 0) ++ cd->info.mask |= CDC_CD_RW; ++ if ((buffer[12] & 1) == 0) ++ cd->info.mask |= CDC_PLAY_AUDIO; ++ if ((buffer[14] & 1) == 0) ++ cd->info.mask |= CDC_LOCK; ++ if ((buffer[14] & 8) == 0) ++ cd->info.mask |= CDC_OPEN_TRAY; ++ if ((buffer[14] >> 6) == 0) ++ cd->info.mask |= CDC_CLOSE_TRAY; + +- k = 0; +- if (pcd_drive_count == 0) { /* nothing spec'd - so autoprobe for 1 */ +- cd = pcd; +- if (cd->disk && pi_init(cd->pi, 1, -1, -1, -1, -1, -1, +- pcd_buffer, PI_PCD, verbose, cd->name)) { +- if (!pcd_probe(cd, -1)) { +- cd->present = 1; +- k++; +- } else +- pi_release(cd->pi); +- } +- } else { +- for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { +- int *conf = *drives[unit]; +- if (!conf[D_PRT]) +- continue; +- if (!cd->disk) +- continue; +- if (!pi_init(cd->pi, 0, conf[D_PRT], conf[D_MOD], +- conf[D_UNI], conf[D_PRO], conf[D_DLY], +- pcd_buffer, PI_PCD, verbose, cd->name)) +- continue; +- if (!pcd_probe(cd, conf[D_SLV])) { +- cd->present = 1; +- k++; +- } else +- pi_release(cd->pi); +- } +- } +- if (k) +- return 0; +- +- printk("%s: No CD-ROM drive found\n", name); +- for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { +- if (!cd->disk) +- continue; +- blk_cleanup_disk(cd->disk); +- blk_mq_free_tag_set(&cd->tag_set); +- } +- pi_unregister_driver(par_drv); +- return -1; ++ return 0; + } + + /* I/O request processing */ +@@ -999,43 +891,121 @@ static int pcd_get_mcn(struct cdrom_device_info *cdi, struct cdrom_mcn *mcn) + return 0; + } + ++static int pcd_init_unit(struct pcd_unit *cd, bool autoprobe, int port, ++ int mode, int unit, int protocol, int delay, int ms) ++{ ++ struct gendisk *disk; ++ int ret; ++ ++ ret = blk_mq_alloc_sq_tag_set(&cd->tag_set, &pcd_mq_ops, 1, ++ BLK_MQ_F_SHOULD_MERGE); ++ if (ret) ++ return ret; ++ ++ disk = blk_mq_alloc_disk(&cd->tag_set, cd); ++ if (IS_ERR(disk)) { ++ ret = PTR_ERR(disk); ++ goto out_free_tag_set; ++ } ++ ++ INIT_LIST_HEAD(&cd->rq_list); ++ blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); ++ cd->disk = disk; ++ cd->pi = &cd->pia; ++ cd->present = 0; ++ cd->last_sense = 0; ++ cd->changed = 1; ++ cd->drive = (*drives[cd - pcd])[D_SLV]; ++ ++ cd->name = &cd->info.name[0]; ++ snprintf(cd->name, sizeof(cd->info.name), "%s%d", name, unit); ++ cd->info.ops = &pcd_dops; ++ cd->info.handle = cd; ++ cd->info.speed = 0; ++ cd->info.capacity = 1; ++ cd->info.mask = 0; ++ disk->major = major; ++ disk->first_minor = unit; ++ disk->minors = 1; ++ strcpy(disk->disk_name, cd->name); /* umm... */ ++ disk->fops = &pcd_bdops; ++ disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; ++ disk->events = DISK_EVENT_MEDIA_CHANGE; ++ ++ if (!pi_init(cd->pi, autoprobe, port, mode, unit, protocol, delay, ++ pcd_buffer, PI_PCD, verbose, cd->name)) ++ goto out_free_disk; ++ if (pcd_probe(cd, ms)) ++ goto out_pi_release; ++ ++ cd->present = 1; ++ pcd_probe_capabilities(cd); ++ register_cdrom(cd->disk, &cd->info); ++ add_disk(cd->disk); ++ return 0; ++ ++out_pi_release: ++ pi_release(cd->pi); ++out_free_disk: ++ blk_cleanup_disk(cd->disk); ++out_free_tag_set: ++ blk_mq_free_tag_set(&cd->tag_set); ++ return ret; ++} ++ + static int __init pcd_init(void) + { +- struct pcd_unit *cd; +- int unit; ++ int found = 0, unit; + + if (disable) + return -EINVAL; + +- pcd_init_units(); ++ if (register_blkdev(major, name)) ++ return -EBUSY; + +- if (pcd_detect()) +- return -ENODEV; ++ pr_info("%s: %s version %s, major %d, nice %d\n", ++ name, name, PCD_VERSION, major, nice); + +- /* get the atapi capabilities page */ +- pcd_probe_capabilities(); ++ par_drv = pi_register_driver(name); ++ if (!par_drv) { ++ pr_err("failed to register %s driver\n", name); ++ goto out_unregister_blkdev; ++ } + +- if (register_blkdev(major, name)) { +- for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { +- if (!cd->disk) +- continue; ++ for (unit = 0; unit < PCD_UNITS; unit++) { ++ if ((*drives[unit])[D_PRT]) ++ pcd_drive_count++; ++ } + +- blk_cleanup_queue(cd->disk->queue); +- blk_mq_free_tag_set(&cd->tag_set); +- put_disk(cd->disk); ++ if (pcd_drive_count == 0) { /* nothing spec'd - so autoprobe for 1 */ ++ if (!pcd_init_unit(pcd, 1, -1, -1, -1, -1, -1, -1)) ++ found++; ++ } else { ++ for (unit = 0; unit < PCD_UNITS; unit++) { ++ struct pcd_unit *cd = &pcd[unit]; ++ int *conf = *drives[unit]; ++ ++ if (!conf[D_PRT]) ++ continue; ++ if (!pcd_init_unit(cd, 0, conf[D_PRT], conf[D_MOD], ++ conf[D_UNI], conf[D_PRO], conf[D_DLY], ++ conf[D_SLV])) ++ found++; + } +- return -EBUSY; + } + +- for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { +- if (cd->present) { +- register_cdrom(cd->disk, &cd->info); +- cd->disk->private_data = cd; +- add_disk(cd->disk); +- } ++ if (!found) { ++ pr_info("%s: No CD-ROM drive found\n", name); ++ goto out_unregister_pi_driver; + } + + return 0; ++ ++out_unregister_pi_driver: ++ pi_unregister_driver(par_drv); ++out_unregister_blkdev: ++ unregister_blkdev(major, name); ++ return -ENODEV; + } + + static void __exit pcd_exit(void) +@@ -1044,20 +1014,18 @@ static void __exit pcd_exit(void) + int unit; + + for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { +- if (!cd->disk) ++ if (!cd->present) + continue; + +- if (cd->present) { +- del_gendisk(cd->disk); +- pi_release(cd->pi); +- unregister_cdrom(&cd->info); +- } +- blk_cleanup_queue(cd->disk->queue); ++ del_gendisk(cd->disk); ++ pi_release(cd->pi); ++ unregister_cdrom(&cd->info); ++ blk_cleanup_disk(cd->disk); ++ + blk_mq_free_tag_set(&cd->tag_set); +- put_disk(cd->disk); + } +- unregister_blkdev(major, name); + pi_unregister_driver(par_drv); ++ unregister_blkdev(major, name); + } + + MODULE_LICENSE("GPL"); +-- +2.35.3 + diff --git a/patches.suse/pcd-fix-error-codes-in-pcd_init_unit.patch b/patches.suse/pcd-fix-error-codes-in-pcd_init_unit.patch new file mode 100644 index 0000000..30ba7b3 --- /dev/null +++ b/patches.suse/pcd-fix-error-codes-in-pcd_init_unit.patch @@ -0,0 +1,59 @@ +From: Dan Carpenter +Date: Fri, 1 Oct 2021 15:26:23 +0300 +Subject: [PATCH] pcd: fix error codes in pcd_init_unit() +Git-commit: d0ac7a30e41174c794fbfa53ea986d9555e5b9f4 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Return -ENODEV on these error paths instead of returning success. + +Fixes: af761f277b7f ("pcd: cleanup initialization") +Signed-off-by: Dan Carpenter +Link: https://lore.kernel.org/r/20211001122623.GA2283@kili +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/paride/pcd.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c +index 4cc0d141db78..f6b1d63e96e1 100644 +--- a/drivers/block/paride/pcd.c ++++ b/drivers/block/paride/pcd.c +@@ -613,8 +613,7 @@ static int pcd_identify(struct pcd_unit *cd) + } + + /* +- * returns 0, with id set if drive is detected +- * -1, if drive detection failed ++ * returns 0, with id set if drive is detected, otherwise an error code. + */ + static int pcd_probe(struct pcd_unit *cd, int ms) + { +@@ -627,7 +626,7 @@ static int pcd_probe(struct pcd_unit *cd, int ms) + if (!pcd_reset(cd) && !pcd_identify(cd)) + return 0; + } +- return -1; ++ return -ENODEV; + } + + static int pcd_probe_capabilities(struct pcd_unit *cd) +@@ -933,9 +932,12 @@ static int pcd_init_unit(struct pcd_unit *cd, bool autoprobe, int port, + disk->events = DISK_EVENT_MEDIA_CHANGE; + + if (!pi_init(cd->pi, autoprobe, port, mode, unit, protocol, delay, +- pcd_buffer, PI_PCD, verbose, cd->name)) ++ pcd_buffer, PI_PCD, verbose, cd->name)) { ++ ret = -ENODEV; + goto out_free_disk; +- if (pcd_probe(cd, ms)) ++ } ++ ret = pcd_probe(cd, ms); ++ if (ret) + goto out_pi_release; + + cd->present = 1; +-- +2.35.3 + diff --git a/patches.suse/pcd-fix-ordering-of-unregister_cdrom.patch b/patches.suse/pcd-fix-ordering-of-unregister_cdrom.patch new file mode 100644 index 0000000..bb36a06 --- /dev/null +++ b/patches.suse/pcd-fix-ordering-of-unregister_cdrom.patch @@ -0,0 +1,36 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:01:08 -0700 +Subject: [PATCH] pcd: fix ordering of unregister_cdrom() +Git-commit: 2b6cabce3954be3341e0fe7b20a27902821fd3dd +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We first register cdrom and then we add_disk() and +so we we should likewise unregister the cdrom first and +then del_gendisk(). + +Signed-off-by: Luis Chamberlain +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/paride/pcd.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c +index a7fab3830d7b..82a654fc4db8 100644 +--- a/drivers/block/paride/pcd.c ++++ b/drivers/block/paride/pcd.c +@@ -1021,9 +1021,9 @@ static void __exit pcd_exit(void) + if (!cd->present) + continue; + ++ unregister_cdrom(&cd->info); + del_gendisk(cd->disk); + pi_release(cd->pi); +- unregister_cdrom(&cd->info); + blk_cleanup_disk(cd->disk); + + blk_mq_free_tag_set(&cd->tag_set); +-- +2.35.3 + diff --git a/patches.suse/pcd-move-the-identify-buffer-into-pcd_identify.patch b/patches.suse/pcd-move-the-identify-buffer-into-pcd_identify.patch new file mode 100644 index 0000000..ba3c401 --- /dev/null +++ b/patches.suse/pcd-move-the-identify-buffer-into-pcd_identify.patch @@ -0,0 +1,82 @@ +From: Christoph Hellwig +Date: Mon, 27 Sep 2021 15:01:03 -0700 +Subject: [PATCH] pcd: move the identify buffer into pcd_identify +Git-commit: 7d8b72aaddd3ec5f350d3e9988d6735a7b9b18e9 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +No need to pass it through a bunch of functions. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/paride/pcd.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c +index f9cdd11f02f5..8903fdaa2046 100644 +--- a/drivers/block/paride/pcd.c ++++ b/drivers/block/paride/pcd.c +@@ -630,10 +630,11 @@ static int pcd_drive_status(struct cdrom_device_info *cdi, int slot_nr) + return CDS_DISC_OK; + } + +-static int pcd_identify(struct pcd_unit *cd, char *id) ++static int pcd_identify(struct pcd_unit *cd) + { +- int k, s; + char id_cmd[12] = { 0x12, 0, 0, 0, 36, 0, 0, 0, 0, 0, 0, 0 }; ++ char id[18]; ++ int k, s; + + pcd_bufblk = -1; + +@@ -664,15 +665,15 @@ static int pcd_identify(struct pcd_unit *cd, char *id) + * returns 0, with id set if drive is detected + * -1, if drive detection failed + */ +-static int pcd_probe(struct pcd_unit *cd, int ms, char *id) ++static int pcd_probe(struct pcd_unit *cd, int ms) + { + if (ms == -1) { + for (cd->drive = 0; cd->drive <= 1; cd->drive++) +- if (!pcd_reset(cd) && !pcd_identify(cd, id)) ++ if (!pcd_reset(cd) && !pcd_identify(cd)) + return 0; + } else { + cd->drive = ms; +- if (!pcd_reset(cd) && !pcd_identify(cd, id)) ++ if (!pcd_reset(cd) && !pcd_identify(cd)) + return 0; + } + return -1; +@@ -709,7 +710,6 @@ static void pcd_probe_capabilities(void) + + static int pcd_detect(void) + { +- char id[18]; + int k, unit; + struct pcd_unit *cd; + +@@ -727,7 +727,7 @@ static int pcd_detect(void) + cd = pcd; + if (cd->disk && pi_init(cd->pi, 1, -1, -1, -1, -1, -1, + pcd_buffer, PI_PCD, verbose, cd->name)) { +- if (!pcd_probe(cd, -1, id)) { ++ if (!pcd_probe(cd, -1)) { + cd->present = 1; + k++; + } else +@@ -744,7 +744,7 @@ static int pcd_detect(void) + conf[D_UNI], conf[D_PRO], conf[D_DLY], + pcd_buffer, PI_PCD, verbose, cd->name)) + continue; +- if (!pcd_probe(cd, conf[D_SLV], id)) { ++ if (!pcd_probe(cd, conf[D_SLV])) { + cd->present = 1; + k++; + } else +-- +2.35.3 + diff --git a/patches.suse/pci-hv-Use-vmbus_requestor-to-generate-transaction-i.patch b/patches.suse/pci-hv-Use-vmbus_requestor-to-generate-transaction-i.patch index aa62b0d..477062d 100644 --- a/patches.suse/pci-hv-Use-vmbus_requestor-to-generate-transaction-i.patch +++ b/patches.suse/pci-hv-Use-vmbus_requestor-to-generate-transaction-i.patch @@ -3,7 +3,7 @@ From: "Andrea Parri (Microsoft)" Date: Tue, 19 Apr 2022 14:23:21 +0200 Subject: [PATCH] PCI: hv: Use vmbus_requestor to generate transaction IDs for VMbus hardening -References: bsc#1204017 +References: bsc#1204017, bsc#1205617 Git-commit: de5ddb7d44347ad8b00533c1850a4e2e636a1ce9 Patch-mainline: v5.19-rc1 diff --git a/patches.suse/pci-hv-fix-synchronization-between-channel-callback-.patch b/patches.suse/pci-hv-fix-synchronization-between-channel-callback-.patch index ce468da..55a8209 100644 --- a/patches.suse/pci-hv-fix-synchronization-between-channel-callback-.patch +++ b/patches.suse/pci-hv-fix-synchronization-between-channel-callback-.patch @@ -3,7 +3,7 @@ From: "Andrea Parri (Microsoft)" Date: Tue, 19 Apr 2022 14:23:25 +0200 Subject: [PATCH] PCI: hv: Fix synchronization between channel callback and hv_compose_msi_msg() -References: bsc#1204017 +References: bsc#1204017, bsc#1205617 Git-commit: a765ed47e45166451680ee9af2b9e435c82ec3ba Patch-mainline: v5.19-rc1 diff --git a/patches.suse/pci-hv-fix-synchronization-between-channel-callback-hv_pci_bus_exit.patch b/patches.suse/pci-hv-fix-synchronization-between-channel-callback-hv_pci_bus_exit.patch index 2515e6c..ca95c0f 100644 --- a/patches.suse/pci-hv-fix-synchronization-between-channel-callback-hv_pci_bus_exit.patch +++ b/patches.suse/pci-hv-fix-synchronization-between-channel-callback-hv_pci_bus_exit.patch @@ -3,7 +3,7 @@ From: "Andrea Parri (Microsoft)" Date: Thu, 12 May 2022 00:32:07 +0200 Subject: [PATCH] PCI: hv: Fix synchronization between channel callback and hv_pci_bus_exit() -References: bsc#1204017 +References: bsc#1204017, bsc#1205617 Git-commit: b4927bd272623694314f37823302f9d67aa5964c Patch-mainline: v5.19-rc1 diff --git a/patches.suse/pcmcia-hide-the-MAC-address-helpers-if-NET.patch b/patches.suse/pcmcia-hide-the-MAC-address-helpers-if-NET.patch new file mode 100644 index 0000000..b1f1898 --- /dev/null +++ b/patches.suse/pcmcia-hide-the-MAC-address-helpers-if-NET.patch @@ -0,0 +1,43 @@ +From bd4b827cec1d4eec7a916dc4da0ca65939f80f4b Mon Sep 17 00:00:00 2001 +From: Jakub Kicinski +Date: Sat, 20 Nov 2021 09:15:10 -0800 +Subject: [PATCH] pcmcia: hide the MAC address helpers if !NET +Git-commit: bd4b827cec1d4eec7a916dc4da0ca65939f80f4b +Patch-mainline: v5.17-rc1 +References: git-fixes + +pcmcia_get_mac_from_cis is only called from networking and +recent changes made it call dev_addr_mod() which is itself +only defined if NET. + +Reported-by: kernel test robot +Fixes: adeef3e32146 ("net: constify netdev->dev_addr") +Signed-off-by: Jakub Kicinski +Signed-off-by: David S. Miller +Acked-by: Takashi Iwai + +--- + drivers/pcmcia/pcmcia_cis.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/pcmcia/pcmcia_cis.c b/drivers/pcmcia/pcmcia_cis.c +index f650e19a315c..6bc0bc24d357 100644 +--- a/drivers/pcmcia/pcmcia_cis.c ++++ b/drivers/pcmcia/pcmcia_cis.c +@@ -386,7 +386,7 @@ size_t pcmcia_get_tuple(struct pcmcia_device *p_dev, cisdata_t code, + } + EXPORT_SYMBOL(pcmcia_get_tuple); + +- ++#ifdef CONFIG_NET + /* + * pcmcia_do_get_mac() - internal helper for pcmcia_get_mac_from_cis() + * +@@ -431,3 +431,4 @@ int pcmcia_get_mac_from_cis(struct pcmcia_device *p_dev, struct net_device *dev) + } + EXPORT_SYMBOL(pcmcia_get_mac_from_cis); + ++#endif /* CONFIG_NET */ +-- +2.35.3 + diff --git a/patches.suse/pd-add-error-handling-support-for-add_disk.patch b/patches.suse/pd-add-error-handling-support-for-add_disk.patch new file mode 100644 index 0000000..25989b0 --- /dev/null +++ b/patches.suse/pd-add-error-handling-support-for-add_disk.patch @@ -0,0 +1,39 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:01:10 -0700 +Subject: [PATCH] pd: add error handling support for add_disk() +Git-commit: 3dfdd5f333bf16ec5057d508a574a3302ed84cfa +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +Signed-off-by: Luis Chamberlain +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/paride/pd.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c +index 500b89a4bdaf..e59759bcf416 100644 +--- a/drivers/block/paride/pd.c ++++ b/drivers/block/paride/pd.c +@@ -938,8 +938,12 @@ static int pd_probe_drive(struct pd_unit *disk, int autoprobe, int port, + if (ret) + goto put_disk; + set_capacity(disk->gd, disk->capacity); +- add_disk(disk->gd); ++ ret = add_disk(disk->gd); ++ if (ret) ++ goto cleanup_disk; + return 0; ++cleanup_disk: ++ blk_cleanup_disk(disk->gd); + put_disk: + put_disk(p); + disk->gd = NULL; +-- +2.35.3 + diff --git a/patches.suse/pd-cleanup-initialization.patch b/patches.suse/pd-cleanup-initialization.patch new file mode 100644 index 0000000..10101a8 --- /dev/null +++ b/patches.suse/pd-cleanup-initialization.patch @@ -0,0 +1,222 @@ +From: Christoph Hellwig +Date: Mon, 27 Sep 2021 15:01:06 -0700 +Subject: [PATCH] pd: cleanup initialization +Git-commit: 1ad392add59c2a7cc3148b2e3041696370b05e5b +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Refactor the pf initialization to have a dedicated helper to initialize +a single disk. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/paride/pd.c | 140 +++++++++++++++++++------------------- + 1 file changed, 70 insertions(+), 70 deletions(-) + +diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c +index 675327df6aff..500b89a4bdaf 100644 +--- a/drivers/block/paride/pd.c ++++ b/drivers/block/paride/pd.c +@@ -875,9 +875,27 @@ static const struct blk_mq_ops pd_mq_ops = { + .queue_rq = pd_queue_rq, + }; + +-static void pd_probe_drive(struct pd_unit *disk) ++static int pd_probe_drive(struct pd_unit *disk, int autoprobe, int port, ++ int mode, int unit, int protocol, int delay) + { ++ int index = disk - pd; ++ int *parm = *drives[index]; + struct gendisk *p; ++ int ret; ++ ++ disk->pi = &disk->pia; ++ disk->access = 0; ++ disk->changed = 1; ++ disk->capacity = 0; ++ disk->drive = parm[D_SLV]; ++ snprintf(disk->name, PD_NAMELEN, "%s%c", name, 'a' + index); ++ disk->alt_geom = parm[D_GEO]; ++ disk->standby = parm[D_SBY]; ++ INIT_LIST_HEAD(&disk->rq_list); ++ ++ if (!pi_init(disk->pi, autoprobe, port, mode, unit, protocol, delay, ++ pd_scratch, PI_PD, verbose, disk->name)) ++ return -ENXIO; + + memset(&disk->tag_set, 0, sizeof(disk->tag_set)); + disk->tag_set.ops = &pd_mq_ops; +@@ -887,14 +905,14 @@ static void pd_probe_drive(struct pd_unit *disk) + disk->tag_set.queue_depth = 2; + disk->tag_set.numa_node = NUMA_NO_NODE; + disk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; +- +- if (blk_mq_alloc_tag_set(&disk->tag_set)) +- return; ++ ret = blk_mq_alloc_tag_set(&disk->tag_set); ++ if (ret) ++ goto pi_release; + + p = blk_mq_alloc_disk(&disk->tag_set, disk); + if (IS_ERR(p)) { +- blk_mq_free_tag_set(&disk->tag_set); +- return; ++ ret = PTR_ERR(p); ++ goto free_tag_set; + } + disk->gd = p; + +@@ -905,102 +923,84 @@ static void pd_probe_drive(struct pd_unit *disk) + p->minors = 1 << PD_BITS; + p->events = DISK_EVENT_MEDIA_CHANGE; + p->private_data = disk; +- + blk_queue_max_hw_sectors(p->queue, cluster); + blk_queue_bounce_limit(p->queue, BLK_BOUNCE_HIGH); + + if (disk->drive == -1) { +- for (disk->drive = 0; disk->drive <= 1; disk->drive++) +- if (pd_special_command(disk, pd_identify) == 0) +- return; +- } else if (pd_special_command(disk, pd_identify) == 0) +- return; +- disk->gd = NULL; ++ for (disk->drive = 0; disk->drive <= 1; disk->drive++) { ++ ret = pd_special_command(disk, pd_identify); ++ if (ret == 0) ++ break; ++ } ++ } else { ++ ret = pd_special_command(disk, pd_identify); ++ } ++ if (ret) ++ goto put_disk; ++ set_capacity(disk->gd, disk->capacity); ++ add_disk(disk->gd); ++ return 0; ++put_disk: + put_disk(p); ++ disk->gd = NULL; ++free_tag_set: ++ blk_mq_free_tag_set(&disk->tag_set); ++pi_release: ++ pi_release(disk->pi); ++ return ret; + } + +-static int pd_detect(void) ++static int __init pd_init(void) + { + int found = 0, unit, pd_drive_count = 0; + struct pd_unit *disk; + +- for (unit = 0; unit < PD_UNITS; unit++) { +- int *parm = *drives[unit]; +- struct pd_unit *disk = pd + unit; +- disk->pi = &disk->pia; +- disk->access = 0; +- disk->changed = 1; +- disk->capacity = 0; +- disk->drive = parm[D_SLV]; +- snprintf(disk->name, PD_NAMELEN, "%s%c", name, 'a'+unit); +- disk->alt_geom = parm[D_GEO]; +- disk->standby = parm[D_SBY]; +- if (parm[D_PRT]) +- pd_drive_count++; +- INIT_LIST_HEAD(&disk->rq_list); +- } ++ if (disable) ++ return -ENODEV; ++ ++ if (register_blkdev(major, name)) ++ return -ENODEV; ++ ++ printk("%s: %s version %s, major %d, cluster %d, nice %d\n", ++ name, name, PD_VERSION, major, cluster, nice); + + par_drv = pi_register_driver(name); + if (!par_drv) { + pr_err("failed to register %s driver\n", name); +- return -1; ++ goto out_unregister_blkdev; + } + +- if (pd_drive_count == 0) { /* nothing spec'd - so autoprobe for 1 */ +- disk = pd; +- if (pi_init(disk->pi, 1, -1, -1, -1, -1, -1, pd_scratch, +- PI_PD, verbose, disk->name)) { +- pd_probe_drive(disk); +- if (!disk->gd) +- pi_release(disk->pi); +- } ++ for (unit = 0; unit < PD_UNITS; unit++) { ++ int *parm = *drives[unit]; + ++ if (parm[D_PRT]) ++ pd_drive_count++; ++ } ++ ++ if (pd_drive_count == 0) { /* nothing spec'd - so autoprobe for 1 */ ++ if (!pd_probe_drive(pd, 1, -1, -1, -1, -1, -1)) ++ found++; + } else { + for (unit = 0, disk = pd; unit < PD_UNITS; unit++, disk++) { + int *parm = *drives[unit]; + if (!parm[D_PRT]) + continue; +- if (pi_init(disk->pi, 0, parm[D_PRT], parm[D_MOD], +- parm[D_UNI], parm[D_PRO], parm[D_DLY], +- pd_scratch, PI_PD, verbose, disk->name)) { +- pd_probe_drive(disk); +- if (!disk->gd) +- pi_release(disk->pi); +- } +- } +- } +- for (unit = 0, disk = pd; unit < PD_UNITS; unit++, disk++) { +- if (disk->gd) { +- set_capacity(disk->gd, disk->capacity); +- add_disk(disk->gd); +- found = 1; ++ if (!pd_probe_drive(disk, 0, parm[D_PRT], parm[D_MOD], ++ parm[D_UNI], parm[D_PRO], parm[D_DLY])) ++ found++; + } + } + if (!found) { + printk("%s: no valid drive found\n", name); +- pi_unregister_driver(par_drv); ++ goto out_pi_unregister_driver; + } +- return found; +-} +- +-static int __init pd_init(void) +-{ +- if (disable) +- goto out1; +- +- if (register_blkdev(major, name)) +- goto out1; +- +- printk("%s: %s version %s, major %d, cluster %d, nice %d\n", +- name, name, PD_VERSION, major, cluster, nice); +- if (!pd_detect()) +- goto out2; + + return 0; + +-out2: ++out_pi_unregister_driver: ++ pi_unregister_driver(par_drv); ++out_unregister_blkdev: + unregister_blkdev(major, name); +-out1: + return -ENODEV; + } + +-- +2.35.3 + diff --git a/patches.suse/percpu_ref-percpu_ref_tryget_live-version-holding-RC.patch b/patches.suse/percpu_ref-percpu_ref_tryget_live-version-holding-RC.patch new file mode 100644 index 0000000..74f575b --- /dev/null +++ b/patches.suse/percpu_ref-percpu_ref_tryget_live-version-holding-RC.patch @@ -0,0 +1,78 @@ +From: Pavel Begunkov +Date: Thu, 21 Oct 2021 14:30:51 +0100 +Subject: [PATCH] percpu_ref: percpu_ref_tryget_live() version holding RCU +Git-commit: 3b13c168186c115501ee7d194460ba2f8c825155 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Add percpu_ref_tryget_live_rcu(), which is a version of +percpu_ref_tryget_live() but the user is responsible for enclosing it in +a RCU read lock section. + +Signed-off-by: Pavel Begunkov +Acked-by: Dennis Zhou +Link: https://lore.kernel.org/r/3066500d7a6eb3e03f10adf98b87fdb3b1c49db8.1634822969.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + include/linux/percpu-refcount.h | 33 +++++++++++++++++++++++---------- + 1 file changed, 23 insertions(+), 10 deletions(-) + +diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h +index ae16a9856305..b31d3f3312ce 100644 +--- a/include/linux/percpu-refcount.h ++++ b/include/linux/percpu-refcount.h +@@ -266,6 +266,28 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref) + return percpu_ref_tryget_many(ref, 1); + } + ++/** ++ * percpu_ref_tryget_live_rcu - same as percpu_ref_tryget_live() but the ++ * caller is responsible for taking RCU. ++ * ++ * This function is safe to call as long as @ref is between init and exit. ++ */ ++static inline bool percpu_ref_tryget_live_rcu(struct percpu_ref *ref) ++{ ++ unsigned long __percpu *percpu_count; ++ bool ret = false; ++ ++ WARN_ON_ONCE(!rcu_read_lock_held()); ++ ++ if (likely(__ref_is_percpu(ref, &percpu_count))) { ++ this_cpu_inc(*percpu_count); ++ ret = true; ++ } else if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) { ++ ret = atomic_long_inc_not_zero(&ref->data->count); ++ } ++ return ret; ++} ++ + /** + * percpu_ref_tryget_live - try to increment a live percpu refcount + * @ref: percpu_ref to try-get +@@ -283,20 +305,11 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref) + */ + static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) + { +- unsigned long __percpu *percpu_count; + bool ret = false; + + rcu_read_lock(); +- +- if (__ref_is_percpu(ref, &percpu_count)) { +- this_cpu_inc(*percpu_count); +- ret = true; +- } else if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) { +- ret = atomic_long_inc_not_zero(&ref->data->count); +- } +- ++ ret = percpu_ref_tryget_live_rcu(ref); + rcu_read_unlock(); +- + return ret; + } + +-- +2.35.3 + diff --git a/patches.suse/pf-add-error-handling-support-for-add_disk.patch b/patches.suse/pf-add-error-handling-support-for-add_disk.patch new file mode 100644 index 0000000..7402115 --- /dev/null +++ b/patches.suse/pf-add-error-handling-support-for-add_disk.patch @@ -0,0 +1,36 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:01:56 -0700 +Subject: [PATCH] pf: add error handling support for add_disk() +Git-commit: 4fac63f8a871bc2e38dce4944c9d964a62bac3e6 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +Signed-off-by: Luis Chamberlain +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/paride/pf.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c +index f471d48a87bc..380d80e507c7 100644 +--- a/drivers/block/paride/pf.c ++++ b/drivers/block/paride/pf.c +@@ -962,7 +962,9 @@ static int __init pf_init_unit(struct pf_unit *pf, bool autoprobe, int port, + if (pf_probe(pf)) + goto out_pi_release; + +- add_disk(disk); ++ ret = add_disk(disk); ++ if (ret) ++ goto out_pi_release; + pf->present = 1; + return 0; + +-- +2.35.3 + diff --git a/patches.suse/pf-cleanup-initialization.patch b/patches.suse/pf-cleanup-initialization.patch new file mode 100644 index 0000000..99eb592 --- /dev/null +++ b/patches.suse/pf-cleanup-initialization.patch @@ -0,0 +1,299 @@ +From: Christoph Hellwig +Date: Mon, 27 Sep 2021 15:01:05 -0700 +Subject: [PATCH] pf: cleanup initialization +Git-commit: fb367e6baeb0789d3f272a9aa21f5116e8ebd9dc +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Refactor the pf initialization to have a dedicated helper to initialize +a single disk. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/paride/pf.c | 223 +++++++++++++++++--------------------- + 1 file changed, 99 insertions(+), 124 deletions(-) + +diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c +index d5b9c88ba76f..f471d48a87bc 100644 +--- a/drivers/block/paride/pf.c ++++ b/drivers/block/paride/pf.c +@@ -214,7 +214,6 @@ static int pf_getgeo(struct block_device *bdev, struct hd_geometry *geo); + + static void pf_release(struct gendisk *disk, fmode_t mode); + +-static int pf_detect(void); + static void do_pf_read(void); + static void do_pf_read_start(void); + static void do_pf_write(void); +@@ -285,45 +284,6 @@ static const struct blk_mq_ops pf_mq_ops = { + .queue_rq = pf_queue_rq, + }; + +-static void __init pf_init_units(void) +-{ +- struct pf_unit *pf; +- int unit; +- +- pf_drive_count = 0; +- for (unit = 0, pf = units; unit < PF_UNITS; unit++, pf++) { +- struct gendisk *disk; +- +- if (blk_mq_alloc_sq_tag_set(&pf->tag_set, &pf_mq_ops, 1, +- BLK_MQ_F_SHOULD_MERGE)) +- continue; +- +- disk = blk_mq_alloc_disk(&pf->tag_set, pf); +- if (IS_ERR(disk)) { +- blk_mq_free_tag_set(&pf->tag_set); +- continue; +- } +- +- INIT_LIST_HEAD(&pf->rq_list); +- blk_queue_max_segments(disk->queue, cluster); +- blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); +- pf->disk = disk; +- pf->pi = &pf->pia; +- pf->media_status = PF_NM; +- pf->drive = (*drives[unit])[D_SLV]; +- pf->lun = (*drives[unit])[D_LUN]; +- snprintf(pf->name, PF_NAMELEN, "%s%d", name, unit); +- disk->major = major; +- disk->first_minor = unit; +- disk->minors = 1; +- strcpy(disk->disk_name, pf->name); +- disk->fops = &pf_fops; +- disk->events = DISK_EVENT_MEDIA_CHANGE; +- if (!(*drives[unit])[D_PRT]) +- pf_drive_count++; +- } +-} +- + static int pf_open(struct block_device *bdev, fmode_t mode) + { + struct pf_unit *pf = bdev->bd_disk->private_data; +@@ -718,59 +678,6 @@ static int pf_probe(struct pf_unit *pf) + return -1; + } + +-static int pf_detect(void) +-{ +- struct pf_unit *pf = units; +- int k, unit; +- +- printk("%s: %s version %s, major %d, cluster %d, nice %d\n", +- name, name, PF_VERSION, major, cluster, nice); +- +- par_drv = pi_register_driver(name); +- if (!par_drv) { +- pr_err("failed to register %s driver\n", name); +- return -1; +- } +- k = 0; +- if (pf_drive_count == 0) { +- if (pi_init(pf->pi, 1, -1, -1, -1, -1, -1, pf_scratch, PI_PF, +- verbose, pf->name)) { +- if (!pf_probe(pf) && pf->disk) { +- pf->present = 1; +- k++; +- } else +- pi_release(pf->pi); +- } +- +- } else +- for (unit = 0; unit < PF_UNITS; unit++, pf++) { +- int *conf = *drives[unit]; +- if (!conf[D_PRT]) +- continue; +- if (pi_init(pf->pi, 0, conf[D_PRT], conf[D_MOD], +- conf[D_UNI], conf[D_PRO], conf[D_DLY], +- pf_scratch, PI_PF, verbose, pf->name)) { +- if (pf->disk && !pf_probe(pf)) { +- pf->present = 1; +- k++; +- } else +- pi_release(pf->pi); +- } +- } +- if (k) +- return 0; +- +- printk("%s: No ATAPI disk detected\n", name); +- for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { +- if (!pf->disk) +- continue; +- blk_cleanup_disk(pf->disk); +- blk_mq_free_tag_set(&pf->tag_set); +- } +- pi_unregister_driver(par_drv); +- return -1; +-} +- + /* The i/o request engine */ + + static int pf_start(struct pf_unit *pf, int cmd, int b, int c) +@@ -1014,61 +921,129 @@ static void do_pf_write_done(void) + next_request(0); + } + ++static int __init pf_init_unit(struct pf_unit *pf, bool autoprobe, int port, ++ int mode, int unit, int protocol, int delay, int ms) ++{ ++ struct gendisk *disk; ++ int ret; ++ ++ ret = blk_mq_alloc_sq_tag_set(&pf->tag_set, &pf_mq_ops, 1, ++ BLK_MQ_F_SHOULD_MERGE); ++ if (ret) ++ return ret; ++ ++ disk = blk_mq_alloc_disk(&pf->tag_set, pf); ++ if (IS_ERR(disk)) { ++ ret = PTR_ERR(disk); ++ goto out_free_tag_set; ++ } ++ disk->major = major; ++ disk->first_minor = pf - units; ++ disk->minors = 1; ++ strcpy(disk->disk_name, pf->name); ++ disk->fops = &pf_fops; ++ disk->events = DISK_EVENT_MEDIA_CHANGE; ++ disk->private_data = pf; ++ ++ blk_queue_max_segments(disk->queue, cluster); ++ blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); ++ ++ INIT_LIST_HEAD(&pf->rq_list); ++ pf->disk = disk; ++ pf->pi = &pf->pia; ++ pf->media_status = PF_NM; ++ pf->drive = (*drives[disk->first_minor])[D_SLV]; ++ pf->lun = (*drives[disk->first_minor])[D_LUN]; ++ snprintf(pf->name, PF_NAMELEN, "%s%d", name, disk->first_minor); ++ ++ if (!pi_init(pf->pi, autoprobe, port, mode, unit, protocol, delay, ++ pf_scratch, PI_PF, verbose, pf->name)) ++ goto out_free_disk; ++ if (pf_probe(pf)) ++ goto out_pi_release; ++ ++ add_disk(disk); ++ pf->present = 1; ++ return 0; ++ ++out_pi_release: ++ pi_release(pf->pi); ++out_free_disk: ++ blk_cleanup_disk(pf->disk); ++out_free_tag_set: ++ blk_mq_free_tag_set(&pf->tag_set); ++ return ret; ++} ++ + static int __init pf_init(void) + { /* preliminary initialisation */ + struct pf_unit *pf; +- int unit; ++ int found = 0, unit; + + if (disable) + return -EINVAL; + +- pf_init_units(); ++ if (register_blkdev(major, name)) ++ return -EBUSY; + +- if (pf_detect()) +- return -ENODEV; +- pf_busy = 0; ++ printk("%s: %s version %s, major %d, cluster %d, nice %d\n", ++ name, name, PF_VERSION, major, cluster, nice); + +- if (register_blkdev(major, name)) { +- for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { +- if (!pf->disk) +- continue; +- blk_cleanup_queue(pf->disk->queue); +- blk_mq_free_tag_set(&pf->tag_set); +- put_disk(pf->disk); +- } +- return -EBUSY; ++ par_drv = pi_register_driver(name); ++ if (!par_drv) { ++ pr_err("failed to register %s driver\n", name); ++ goto out_unregister_blkdev; + } + +- for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { +- struct gendisk *disk = pf->disk; ++ for (unit = 0; unit < PF_UNITS; unit++) { ++ if (!(*drives[unit])[D_PRT]) ++ pf_drive_count++; ++ } + +- if (!pf->present) +- continue; +- disk->private_data = pf; +- add_disk(disk); ++ pf = units; ++ if (pf_drive_count == 0) { ++ if (pf_init_unit(pf, 1, -1, -1, -1, -1, -1, verbose)) ++ found++; ++ } else { ++ for (unit = 0; unit < PF_UNITS; unit++, pf++) { ++ int *conf = *drives[unit]; ++ if (!conf[D_PRT]) ++ continue; ++ if (pf_init_unit(pf, 0, conf[D_PRT], conf[D_MOD], ++ conf[D_UNI], conf[D_PRO], conf[D_DLY], ++ verbose)) ++ found++; ++ } ++ } ++ if (!found) { ++ printk("%s: No ATAPI disk detected\n", name); ++ goto out_unregister_pi_driver; + } ++ pf_busy = 0; + return 0; ++ ++out_unregister_pi_driver: ++ pi_unregister_driver(par_drv); ++out_unregister_blkdev: ++ unregister_blkdev(major, name); ++ return -ENODEV; + } + + static void __exit pf_exit(void) + { + struct pf_unit *pf; + int unit; +- unregister_blkdev(major, name); ++ + for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { +- if (!pf->disk) ++ if (!pf->present) + continue; +- +- if (pf->present) +- del_gendisk(pf->disk); +- +- blk_cleanup_queue(pf->disk->queue); ++ del_gendisk(pf->disk); ++ blk_cleanup_disk(pf->disk); + blk_mq_free_tag_set(&pf->tag_set); +- put_disk(pf->disk); +- +- if (pf->present) +- pi_release(pf->pi); ++ pi_release(pf->pi); + } ++ ++ unregister_blkdev(major, name); + } + + MODULE_LICENSE("GPL"); +-- +2.35.3 + diff --git a/patches.suse/pf-fix-error-codes-in-pf_init_unit.patch b/patches.suse/pf-fix-error-codes-in-pf_init_unit.patch new file mode 100644 index 0000000..2adee5b --- /dev/null +++ b/patches.suse/pf-fix-error-codes-in-pf_init_unit.patch @@ -0,0 +1,62 @@ +From: Dan Carpenter +Date: Fri, 1 Oct 2021 15:26:54 +0300 +Subject: [PATCH] pf: fix error codes in pf_init_unit() +Git-commit: cfc03eabda8224c681087a4c6c51d1cc595ebfaf +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Return a negative error code instead of success on these error paths. + +Fixes: fb367e6baeb0 ("pf: cleanup initialization") +Signed-off-by: Dan Carpenter +Link: https://lore.kernel.org/r/20211001122654.GB2283@kili +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/paride/pf.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c +index 380d80e507c7..bf8d0ef41a0a 100644 +--- a/drivers/block/paride/pf.c ++++ b/drivers/block/paride/pf.c +@@ -651,9 +651,9 @@ static int pf_identify(struct pf_unit *pf) + return 0; + } + +-/* returns 0, with id set if drive is detected +- -1, if drive detection failed +-*/ ++/* ++ * returns 0, with id set if drive is detected, otherwise an error code. ++ */ + static int pf_probe(struct pf_unit *pf) + { + if (pf->drive == -1) { +@@ -675,7 +675,7 @@ static int pf_probe(struct pf_unit *pf) + if (!pf_identify(pf)) + return 0; + } +- return -1; ++ return -ENODEV; + } + + /* The i/o request engine */ +@@ -957,9 +957,12 @@ static int __init pf_init_unit(struct pf_unit *pf, bool autoprobe, int port, + snprintf(pf->name, PF_NAMELEN, "%s%d", name, disk->first_minor); + + if (!pi_init(pf->pi, autoprobe, port, mode, unit, protocol, delay, +- pf_scratch, PI_PF, verbose, pf->name)) ++ pf_scratch, PI_PF, verbose, pf->name)) { ++ ret = -ENODEV; + goto out_free_disk; +- if (pf_probe(pf)) ++ } ++ ret = pf_probe(pf); ++ if (ret) + goto out_pi_release; + + ret = add_disk(disk); +-- +2.35.3 + diff --git a/patches.suse/pktcdvd-add-error-handling-support-for-add_disk.patch b/patches.suse/pktcdvd-add-error-handling-support-for-add_disk.patch new file mode 100644 index 0000000..479abd1 --- /dev/null +++ b/patches.suse/pktcdvd-add-error-handling-support-for-add_disk.patch @@ -0,0 +1,39 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:01:49 -0700 +Subject: [PATCH] pktcdvd: add error handling support for add_disk() +Git-commit: 7b505627568c088b364705a86234fa1f2beb01b9 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +The out_mem2 error label already does what we need so +re-use that. + +Signed-off-by: Luis Chamberlain +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/pktcdvd.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c +index cb52cce6fb03..e48d4771d4c1 100644 +--- a/drivers/block/pktcdvd.c ++++ b/drivers/block/pktcdvd.c +@@ -2728,7 +2728,9 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) + /* inherit events of the host device */ + disk->events = pd->bdev->bd_disk->events; + +- add_disk(disk); ++ ret = add_disk(disk); ++ if (ret) ++ goto out_mem2; + + pkt_sysfs_dev_new(pd); + pkt_debugfs_dev_new(pd); +-- +2.35.3 + diff --git a/patches.suse/platform-chrome-Split-trace-include-file.patch b/patches.suse/platform-chrome-Split-trace-include-file.patch new file mode 100644 index 0000000..b82bd3c --- /dev/null +++ b/patches.suse/platform-chrome-Split-trace-include-file.patch @@ -0,0 +1,314 @@ +From eabd9a3807e17e211690e6c40f1405b427b64c48 Mon Sep 17 00:00:00 2001 +From: Gwendal Grignou +Date: Fri, 21 Jan 2022 16:13:01 -0800 +Subject: [PATCH] platform: chrome: Split trace include file +Git-commit: eabd9a3807e17e211690e6c40f1405b427b64c48 +References: git-fixes +Patch-mainline: v5.18-rc1 + +cros_ec_trace.h defined 5 tracing events, 2 for cros_ec_proto and +3 for cros_ec_sensorhub_ring. +These 2 files are in different kernel modules, the traces are defined +twice in the kernel which leads to problem enabling only some traces. + +Move sensorhub traces from cros_ec_trace.h to cros_ec_sensorhub_trace.h +and enable them only in cros_ec_sensorhub kernel module. + +Check we can now enable any single traces: without this patch, +we can only enable all sensorhub traces or none. + +Fixes: d453ceb6549a ("platform/chrome: sensorhub: Add trace events for sample") + +Signed-off-by: Gwendal Grignou +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20220122001301.640337-1-gwendal@chromium.org +Signed-off-by: Benson Leung +Signed-off-by: Oliver Neukum +--- + drivers/platform/chrome/Makefile | 3 +- + .../platform/chrome/cros_ec_sensorhub_ring.c | 3 +- + .../platform/chrome/cros_ec_sensorhub_trace.h | 123 ++++++++++++++++++ + drivers/platform/chrome/cros_ec_trace.h | 95 -------------- + 4 files changed, 127 insertions(+), 97 deletions(-) + create mode 100644 drivers/platform/chrome/cros_ec_sensorhub_trace.h + +diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile +index f901d2e43166..88cbc434c06b 100644 +--- a/drivers/platform/chrome/Makefile ++++ b/drivers/platform/chrome/Makefile +@@ -2,6 +2,7 @@ + + # tell define_trace.h where to find the cros ec trace header + CFLAGS_cros_ec_trace.o:= -I$(src) ++CFLAGS_cros_ec_sensorhub_ring.o:= -I$(src) + + obj-$(CONFIG_CHROMEOS_LAPTOP) += chromeos_laptop.o + obj-$(CONFIG_CHROMEOS_PSTORE) += chromeos_pstore.o +@@ -20,7 +21,7 @@ obj-$(CONFIG_CROS_EC_CHARDEV) += cros_ec_chardev.o + obj-$(CONFIG_CROS_EC_LIGHTBAR) += cros_ec_lightbar.o + obj-$(CONFIG_CROS_EC_VBC) += cros_ec_vbc.o + obj-$(CONFIG_CROS_EC_DEBUGFS) += cros_ec_debugfs.o +-cros-ec-sensorhub-objs := cros_ec_sensorhub.o cros_ec_sensorhub_ring.o cros_ec_trace.o ++cros-ec-sensorhub-objs := cros_ec_sensorhub.o cros_ec_sensorhub_ring.o + obj-$(CONFIG_CROS_EC_SENSORHUB) += cros-ec-sensorhub.o + obj-$(CONFIG_CROS_EC_SYSFS) += cros_ec_sysfs.o + obj-$(CONFIG_CROS_USBPD_LOGGER) += cros_usbpd_logger.o +diff --git a/drivers/platform/chrome/cros_ec_sensorhub_ring.c b/drivers/platform/chrome/cros_ec_sensorhub_ring.c +index 98e37080f760..71948dade0e2 100644 +--- a/drivers/platform/chrome/cros_ec_sensorhub_ring.c ++++ b/drivers/platform/chrome/cros_ec_sensorhub_ring.c +@@ -17,7 +17,8 @@ + #include + #include + +-#include "cros_ec_trace.h" ++#define CREATE_TRACE_POINTS ++#include "cros_ec_sensorhub_trace.h" + + /* Precision of fixed point for the m values from the filter */ + #define M_PRECISION BIT(23) +diff --git a/drivers/platform/chrome/cros_ec_sensorhub_trace.h b/drivers/platform/chrome/cros_ec_sensorhub_trace.h +new file mode 100644 +index 000000000000..57d9b4785969 +--- /dev/null ++++ b/drivers/platform/chrome/cros_ec_sensorhub_trace.h +@@ -0,0 +1,123 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Trace events for the ChromeOS Sensorhub kernel module ++ * ++ * Copyright 2021 Google LLC. ++ */ ++ ++#undef TRACE_SYSTEM ++#define TRACE_SYSTEM cros_ec ++ ++#if !defined(_CROS_EC_SENSORHUB_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) ++#define _CROS_EC_SENSORHUB_TRACE_H_ ++ ++#include ++#include ++ ++#include ++ ++TRACE_EVENT(cros_ec_sensorhub_timestamp, ++ TP_PROTO(u32 ec_sample_timestamp, u32 ec_fifo_timestamp, s64 fifo_timestamp, ++ s64 current_timestamp, s64 current_time), ++ TP_ARGS(ec_sample_timestamp, ec_fifo_timestamp, fifo_timestamp, current_timestamp, ++ current_time), ++ TP_STRUCT__entry( ++ __field(u32, ec_sample_timestamp) ++ __field(u32, ec_fifo_timestamp) ++ __field(s64, fifo_timestamp) ++ __field(s64, current_timestamp) ++ __field(s64, current_time) ++ __field(s64, delta) ++ ), ++ TP_fast_assign( ++ __entry->ec_sample_timestamp = ec_sample_timestamp; ++ __entry->ec_fifo_timestamp = ec_fifo_timestamp; ++ __entry->fifo_timestamp = fifo_timestamp; ++ __entry->current_timestamp = current_timestamp; ++ __entry->current_time = current_time; ++ __entry->delta = current_timestamp - current_time; ++ ), ++ TP_printk("ec_ts: %9u, ec_fifo_ts: %9u, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld", ++ __entry->ec_sample_timestamp, ++ __entry->ec_fifo_timestamp, ++ __entry->fifo_timestamp, ++ __entry->current_timestamp, ++ __entry->current_time, ++ __entry->delta ++ ) ++); ++ ++TRACE_EVENT(cros_ec_sensorhub_data, ++ TP_PROTO(u32 ec_sensor_num, u32 ec_fifo_timestamp, s64 fifo_timestamp, ++ s64 current_timestamp, s64 current_time), ++ TP_ARGS(ec_sensor_num, ec_fifo_timestamp, fifo_timestamp, current_timestamp, current_time), ++ TP_STRUCT__entry( ++ __field(u32, ec_sensor_num) ++ __field(u32, ec_fifo_timestamp) ++ __field(s64, fifo_timestamp) ++ __field(s64, current_timestamp) ++ __field(s64, current_time) ++ __field(s64, delta) ++ ), ++ TP_fast_assign( ++ __entry->ec_sensor_num = ec_sensor_num; ++ __entry->ec_fifo_timestamp = ec_fifo_timestamp; ++ __entry->fifo_timestamp = fifo_timestamp; ++ __entry->current_timestamp = current_timestamp; ++ __entry->current_time = current_time; ++ __entry->delta = current_timestamp - current_time; ++ ), ++ TP_printk("ec_num: %4u, ec_fifo_ts: %9u, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld", ++ __entry->ec_sensor_num, ++ __entry->ec_fifo_timestamp, ++ __entry->fifo_timestamp, ++ __entry->current_timestamp, ++ __entry->current_time, ++ __entry->delta ++ ) ++); ++ ++TRACE_EVENT(cros_ec_sensorhub_filter, ++ TP_PROTO(struct cros_ec_sensors_ts_filter_state *state, s64 dx, s64 dy), ++ TP_ARGS(state, dx, dy), ++ TP_STRUCT__entry( ++ __field(s64, dx) ++ __field(s64, dy) ++ __field(s64, median_m) ++ __field(s64, median_error) ++ __field(s64, history_len) ++ __field(s64, x) ++ __field(s64, y) ++ ), ++ TP_fast_assign( ++ __entry->dx = dx; ++ __entry->dy = dy; ++ __entry->median_m = state->median_m; ++ __entry->median_error = state->median_error; ++ __entry->history_len = state->history_len; ++ __entry->x = state->x_offset; ++ __entry->y = state->y_offset; ++ ), ++ TP_printk("dx: %12lld. dy: %12lld median_m: %12lld median_error: %12lld len: %lld x: %12lld y: %12lld", ++ __entry->dx, ++ __entry->dy, ++ __entry->median_m, ++ __entry->median_error, ++ __entry->history_len, ++ __entry->x, ++ __entry->y ++ ) ++); ++ ++ ++#endif /* _CROS_EC_SENSORHUB_TRACE_H_ */ ++ ++/* this part must be outside header guard */ ++ ++#undef TRACE_INCLUDE_PATH ++#define TRACE_INCLUDE_PATH . ++ ++#undef TRACE_INCLUDE_FILE ++#define TRACE_INCLUDE_FILE cros_ec_sensorhub_trace ++ ++#include +diff --git a/drivers/platform/chrome/cros_ec_trace.h b/drivers/platform/chrome/cros_ec_trace.h +index 7e7cfc98657a..9bb5cd2c98b8 100644 +--- a/drivers/platform/chrome/cros_ec_trace.h ++++ b/drivers/platform/chrome/cros_ec_trace.h +@@ -15,7 +15,6 @@ + #include + #include + #include +-#include + + #include + +@@ -71,100 +70,6 @@ TRACE_EVENT(cros_ec_request_done, + __entry->retval) + ); + +-TRACE_EVENT(cros_ec_sensorhub_timestamp, +- TP_PROTO(u32 ec_sample_timestamp, u32 ec_fifo_timestamp, s64 fifo_timestamp, +- s64 current_timestamp, s64 current_time), +- TP_ARGS(ec_sample_timestamp, ec_fifo_timestamp, fifo_timestamp, current_timestamp, +- current_time), +- TP_STRUCT__entry( +- __field(u32, ec_sample_timestamp) +- __field(u32, ec_fifo_timestamp) +- __field(s64, fifo_timestamp) +- __field(s64, current_timestamp) +- __field(s64, current_time) +- __field(s64, delta) +- ), +- TP_fast_assign( +- __entry->ec_sample_timestamp = ec_sample_timestamp; +- __entry->ec_fifo_timestamp = ec_fifo_timestamp; +- __entry->fifo_timestamp = fifo_timestamp; +- __entry->current_timestamp = current_timestamp; +- __entry->current_time = current_time; +- __entry->delta = current_timestamp - current_time; +- ), +- TP_printk("ec_ts: %9u, ec_fifo_ts: %9u, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld", +- __entry->ec_sample_timestamp, +- __entry->ec_fifo_timestamp, +- __entry->fifo_timestamp, +- __entry->current_timestamp, +- __entry->current_time, +- __entry->delta +- ) +-); +- +-TRACE_EVENT(cros_ec_sensorhub_data, +- TP_PROTO(u32 ec_sensor_num, u32 ec_fifo_timestamp, s64 fifo_timestamp, +- s64 current_timestamp, s64 current_time), +- TP_ARGS(ec_sensor_num, ec_fifo_timestamp, fifo_timestamp, current_timestamp, current_time), +- TP_STRUCT__entry( +- __field(u32, ec_sensor_num) +- __field(u32, ec_fifo_timestamp) +- __field(s64, fifo_timestamp) +- __field(s64, current_timestamp) +- __field(s64, current_time) +- __field(s64, delta) +- ), +- TP_fast_assign( +- __entry->ec_sensor_num = ec_sensor_num; +- __entry->ec_fifo_timestamp = ec_fifo_timestamp; +- __entry->fifo_timestamp = fifo_timestamp; +- __entry->current_timestamp = current_timestamp; +- __entry->current_time = current_time; +- __entry->delta = current_timestamp - current_time; +- ), +- TP_printk("ec_num: %4u, ec_fifo_ts: %9u, fifo_ts: %12lld, curr_ts: %12lld, curr_time: %12lld, delta %12lld", +- __entry->ec_sensor_num, +- __entry->ec_fifo_timestamp, +- __entry->fifo_timestamp, +- __entry->current_timestamp, +- __entry->current_time, +- __entry->delta +- ) +-); +- +-TRACE_EVENT(cros_ec_sensorhub_filter, +- TP_PROTO(struct cros_ec_sensors_ts_filter_state *state, s64 dx, s64 dy), +- TP_ARGS(state, dx, dy), +- TP_STRUCT__entry( +- __field(s64, dx) +- __field(s64, dy) +- __field(s64, median_m) +- __field(s64, median_error) +- __field(s64, history_len) +- __field(s64, x) +- __field(s64, y) +- ), +- TP_fast_assign( +- __entry->dx = dx; +- __entry->dy = dy; +- __entry->median_m = state->median_m; +- __entry->median_error = state->median_error; +- __entry->history_len = state->history_len; +- __entry->x = state->x_offset; +- __entry->y = state->y_offset; +- ), +- TP_printk("dx: %12lld. dy: %12lld median_m: %12lld median_error: %12lld len: %lld x: %12lld y: %12lld", +- __entry->dx, +- __entry->dy, +- __entry->median_m, +- __entry->median_error, +- __entry->history_len, +- __entry->x, +- __entry->y +- ) +-); +- +- + #endif /* _CROS_EC_TRACE_H_ */ + + /* this part must be outside header guard */ +-- +2.35.3 + diff --git a/patches.suse/platform-x86-intel-hid-add-quirk-to-support-Surface-.patch b/patches.suse/platform-x86-intel-hid-add-quirk-to-support-Surface-.patch new file mode 100644 index 0000000..746e166 --- /dev/null +++ b/patches.suse/platform-x86-intel-hid-add-quirk-to-support-Surface-.patch @@ -0,0 +1,43 @@ +From 7d0c009043f6a970f62dbf5aecda9f8c3ccafcff Mon Sep 17 00:00:00 2001 +From: Alex Hung +Date: Fri, 3 Dec 2021 14:28:10 -0700 +Subject: [PATCH] platform/x86/intel: hid: add quirk to support Surface Go 3 +Git-commit: 7d0c009043f6a970f62dbf5aecda9f8c3ccafcff +References: git-fixes +Patch-mainline: v5.16-rc5 + +Similar to other systems Surface Go 3 requires a DMI quirk to enable +5 button array for power and volume buttons. + +Buglink: https://github.com/linux-surface/linux-surface/issues/595 + +Cc: stable@vger.kernel.org +Signed-off-by: Alex Hung +Link: https://lore.kernel.org/r/20211203212810.2666508-1-alex.hung@canonical.com +Signed-off-by: Hans de Goede +Signed-off-by: Oliver Neukum +--- + drivers/platform/x86/intel/hid.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c +index 08598942a6d7..13f8cf70b9ae 100644 +--- a/drivers/platform/x86/intel-hid.c ++++ b/drivers/platform/x86/intel-hid.c +@@ -99,6 +99,13 @@ static const struct dmi_system_id button_array_table[] = { + DMI_MATCH(DMI_PRODUCT_FAMILY, "ThinkPad X1 Tablet Gen 2"), + }, + }, ++ { ++ .ident = "Microsoft Surface Go 3", ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go 3"), ++ }, ++ }, + { } + }; + +-- +2.35.3 + diff --git a/patches.suse/powerpc-64-Fix-build-failure-with-allyesconfig-in-bo.patch b/patches.suse/powerpc-64-Fix-build-failure-with-allyesconfig-in-bo.patch new file mode 100644 index 0000000..6b9da46 --- /dev/null +++ b/patches.suse/powerpc-64-Fix-build-failure-with-allyesconfig-in-bo.patch @@ -0,0 +1,55 @@ +From af41d2866f7d75bbb38d487f6ec7770425d70e45 Mon Sep 17 00:00:00 2001 +From: Christophe Leroy +Date: Sun, 27 Mar 2022 09:32:26 +0200 +Subject: [PATCH] powerpc/64: Fix build failure with allyesconfig in + book3s_64_entry.S + +References: bsc#1194869 +Patch-mainline: v5.18-rc2 +Git-commit: af41d2866f7d75bbb38d487f6ec7770425d70e45 + +Using conditional branches between two files is hasardous, +they may get linked too far from each other. + + arch/powerpc/kvm/book3s_64_entry.o:(.text+0x3ec): relocation truncated + to fit: R_PPC64_REL14 (stub) against symbol `system_reset_common' + defined in .text section in arch/powerpc/kernel/head_64.o + +Reorganise the code to use non conditional branches. + +Fixes: 89d35b239101 ("KVM: PPC: Book3S HV P9: Implement the rest of the P9 path in C") +Signed-off-by: Christophe Leroy +[mpe: Avoid odd-looking bne ., use named local labels] +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/89cf27bf43ee07a0b2879b9e8e2f5cd6386a3645.1648366338.git.christophe.leroy@csgroup.eu +Acked-by: Michal Suchanek +--- + arch/powerpc/kvm/book3s_64_entry.S | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S +index 05e003eb5d90..e42d1c609e47 100644 +--- a/arch/powerpc/kvm/book3s_64_entry.S ++++ b/arch/powerpc/kvm/book3s_64_entry.S +@@ -414,10 +414,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) + */ + ld r10,HSTATE_SCRATCH0(r13) + cmpwi r10,BOOK3S_INTERRUPT_MACHINE_CHECK +- beq machine_check_common ++ beq .Lcall_machine_check_common + + cmpwi r10,BOOK3S_INTERRUPT_SYSTEM_RESET +- beq system_reset_common ++ beq .Lcall_system_reset_common + + b . ++ ++.Lcall_machine_check_common: ++ b machine_check_common ++ ++.Lcall_system_reset_common: ++ b system_reset_common + #endif +-- +2.35.3 + diff --git a/patches.suse/powerpc-boot-Explicitly-disable-usage-of-SPE-instruc.patch b/patches.suse/powerpc-boot-Explicitly-disable-usage-of-SPE-instruc.patch new file mode 100644 index 0000000..62a053a --- /dev/null +++ b/patches.suse/powerpc-boot-Explicitly-disable-usage-of-SPE-instruc.patch @@ -0,0 +1,40 @@ +From 110a58b9f91c66f743c01a2c217243d94c899c23 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Pali=20Roh=C3=A1r?= +Date: Sat, 27 Aug 2022 15:44:54 +0200 +Subject: [PATCH] powerpc/boot: Explicitly disable usage of SPE instructions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +References: bsc#1156395 +Patch-mainline: v6.1-rc1 +Git-commit: 110a58b9f91c66f743c01a2c217243d94c899c23 + +uImage boot wrapper should not use SPE instructions, like kernel itself. +Boot wrapper has already disabled Altivec and VSX instructions but not SPE. +Options -mno-spe and -mspe=no already set when compilation of kernel, but +not when compiling uImage wrapper yet. Fix it. + +Cc: stable@vger.kernel.org +Signed-off-by: Pali Rohár +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20220827134454.17365-1-pali@kernel.org +Acked-by: Michal Suchanek +--- + arch/powerpc/boot/Makefile | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile +--- a/arch/powerpc/boot/Makefile ++++ b/arch/powerpc/boot/Makefile +@@ -34,6 +34,7 @@ endif + + BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ + -fno-strict-aliasing -O2 -msoft-float -mno-altivec -mno-vsx \ ++ $(call cc-option,-mno-spe) $(call cc-option,-mspe=no) \ + -pipe -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ + -include $(srctree)/include/linux/compiler_attributes.h \ + $(LINUXINCLUDE) +-- +2.35.3 + diff --git a/patches.suse/powerpc-kvm-Fix-kvm_use_magic_page.patch b/patches.suse/powerpc-kvm-Fix-kvm_use_magic_page.patch new file mode 100644 index 0000000..dd108fb --- /dev/null +++ b/patches.suse/powerpc-kvm-Fix-kvm_use_magic_page.patch @@ -0,0 +1,37 @@ +From 0c8eb2884a42d992c7726539328b7d3568f22143 Mon Sep 17 00:00:00 2001 +From: Andreas Gruenbacher +Date: Mon, 2 Aug 2021 13:46:19 +0200 +Subject: [PATCH] powerpc/kvm: Fix kvm_use_magic_page + +References: bsc#1156395 +Patch-mainline: v5.16-rc1 +Git-commit: 0c8eb2884a42d992c7726539328b7d3568f22143 + +When switching from __get_user to fault_in_pages_readable, commit +9f9eae5ce717 broke kvm_use_magic_page: like __get_user, +fault_in_pages_readable returns 0 on success. + +Fixes: 9f9eae5ce717 ("powerpc/kvm: Prefer fault_in_pages_readable function") +Cc: stable@vger.kernel.org # v4.18+ +Signed-off-by: Andreas Gruenbacher +Acked-by: Michal Suchanek +--- + arch/powerpc/kernel/kvm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c +index 617eba82531c..d89cf802d9aa 100644 +--- a/arch/powerpc/kernel/kvm.c ++++ b/arch/powerpc/kernel/kvm.c +@@ -669,7 +669,7 @@ static void __init kvm_use_magic_page(void) + on_each_cpu(kvm_map_magic_page, &features, 1); + + /* Quick self-test to see if the mapping works */ +- if (!fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) { ++ if (fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) { + kvm_patching_worked = false; + return; + } +-- +2.35.3 + diff --git a/patches.suse/powerpc-ppc-opcode-introduce-PPC_RAW_BRANCH-macro.patch b/patches.suse/powerpc-ppc-opcode-introduce-PPC_RAW_BRANCH-macro.patch new file mode 100644 index 0000000..b4f6662 --- /dev/null +++ b/patches.suse/powerpc-ppc-opcode-introduce-PPC_RAW_BRANCH-macro.patch @@ -0,0 +1,42 @@ +From: Hari Bathini +Date: Tue, 12 Oct 2021 18:00:52 +0530 +Subject: powerpc/ppc-opcode: introduce PPC_RAW_BRANCH() macro +Patch-mainline: v5.17-rc1 +Git-commit: f15a71b3880bf07b40810644e5ac6f177c2a7c8f +References: jsc#PED-1368 + +Define and use PPC_RAW_BRANCH() macro instead of open coding it. This +macro is used while adding BPF_PROBE_MEM support. + +Signed-off-by: Hari Bathini +Reviewed-by: Christophe Leroy +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20211012123056.485795-5-hbathini@linux.ibm.com +Acked-by: Shung-Hsi Yu +--- + arch/powerpc/include/asm/ppc-opcode.h | 2 ++ + arch/powerpc/net/bpf_jit.h | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +--- a/arch/powerpc/include/asm/ppc-opcode.h ++++ b/arch/powerpc/include/asm/ppc-opcode.h +@@ -566,6 +566,8 @@ + #define PPC_RAW_MTSPR(spr, d) (0x7c0003a6 | ___PPC_RS(d) | __PPC_SPR(spr)) + #define PPC_RAW_EIEIO() (0x7c0006ac) + ++#define PPC_RAW_BRANCH(addr) (PPC_INST_BRANCH | ((addr) & 0x03fffffc)) ++ + /* Deal with instructions that older assemblers aren't aware of */ + #define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH) + #define PPC_CP_ABORT stringify_in_c(.long PPC_RAW_CP_ABORT) +--- a/arch/powerpc/net/bpf_jit.h ++++ b/arch/powerpc/net/bpf_jit.h +@@ -31,7 +31,7 @@ + pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ + return -ERANGE; \ + } \ +- EMIT(PPC_INST_BRANCH | (offset & 0x03fffffc)); \ ++ EMIT(PPC_RAW_BRANCH(offset)); \ + } while (0) + + /* blr; (unconditional 'branch' with link) to absolute address */ diff --git a/patches.suse/powerpc-pseries-vas-Declare-pseries_vas_fault_thread.patch b/patches.suse/powerpc-pseries-vas-Declare-pseries_vas_fault_thread.patch new file mode 100644 index 0000000..54e3b74 --- /dev/null +++ b/patches.suse/powerpc-pseries-vas-Declare-pseries_vas_fault_thread.patch @@ -0,0 +1,40 @@ +From 4cb266074aa17e9cafed3a92e9f43b161516569f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Thu, 19 Aug 2021 14:56:52 +0200 +Subject: [PATCH] powerpc/pseries/vas: Declare pseries_vas_fault_thread_fn() as + static +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +References: bsc#1194869 +Patch-mainline: v5.15-rc1 +Git-commit: 4cb266074aa17e9cafed3a92e9f43b161516569f + +This fixes a compile error with W=1. + +Fixes: 6d0aaf5e0de0 ("powerpc/pseries/vas: Setup IRQ and fault handling") +Signed-off-by: Cédric Le Goater +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20210819125656.14498-3-clg@kaod.org +Acked-by: Michal Suchanek +--- + arch/powerpc/platforms/pseries/vas.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c +index b5c1cf1bc64d..b043e3936d21 100644 +--- a/arch/powerpc/platforms/pseries/vas.c ++++ b/arch/powerpc/platforms/pseries/vas.c +@@ -184,7 +184,7 @@ static int h_get_nx_fault(u32 winid, u64 buffer) + * Note: The hypervisor forwards an interrupt for each fault request. + * So one fault CRB to process for each H_GET_NX_FAULT hcall. + */ +-irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) ++static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) + { + struct pseries_vas_window *txwin = data; + struct coprocessor_request_block crb; +-- +2.35.3 + diff --git a/patches.suse/powerpc32-bpf-Fix-codegen-for-bpf-to-bpf-calls.patch b/patches.suse/powerpc32-bpf-Fix-codegen-for-bpf-to-bpf-calls.patch new file mode 100644 index 0000000..d0bb68c --- /dev/null +++ b/patches.suse/powerpc32-bpf-Fix-codegen-for-bpf-to-bpf-calls.patch @@ -0,0 +1,34 @@ +From: "Naveen N. Rao" +Date: Thu, 6 Jan 2022 17:15:06 +0530 +Subject: powerpc32/bpf: Fix codegen for bpf-to-bpf calls +Patch-mainline: v5.17-rc2 +Git-commit: fab07611fb2e6a15fac05c4583045ca5582fd826 +References: jsc#PED-1368 + +Pad instructions emitted for BPF_CALL so that the number of instructions +generated does not change for different function addresses. This is +especially important for calls to other bpf functions, whose address +will only be known during extra pass. + +Fixes: 51c66ad849a703 ("powerpc/bpf: Implement extended BPF on PPC32") +Cc: stable@vger.kernel.org # v5.13+ +Signed-off-by: Naveen N. Rao +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/52d8fe51f7620a6f27f377791564d79d75463576.1641468127.git.naveen.n.rao@linux.vnet.ibm.com +Acked-by: Shung-Hsi Yu +--- + arch/powerpc/net/bpf_jit_comp32.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/powerpc/net/bpf_jit_comp32.c ++++ b/arch/powerpc/net/bpf_jit_comp32.c +@@ -191,6 +191,9 @@ void bpf_jit_emit_func_call_rel(u32 *ima + + if (image && rel < 0x2000000 && rel >= -0x2000000) { + PPC_BL_ABS(func); ++ EMIT(PPC_RAW_NOP()); ++ EMIT(PPC_RAW_NOP()); ++ EMIT(PPC_RAW_NOP()); + } else { + /* Load function address into r0 */ + EMIT(PPC_RAW_LIS(_R0, IMM_H(func))); diff --git a/patches.suse/powerpc64-bpf-Limit-ldbrx-to-processors-compliant-wi.patch b/patches.suse/powerpc64-bpf-Limit-ldbrx-to-processors-compliant-wi.patch new file mode 100644 index 0000000..79c54f7 --- /dev/null +++ b/patches.suse/powerpc64-bpf-Limit-ldbrx-to-processors-compliant-wi.patch @@ -0,0 +1,95 @@ +From: "Naveen N. Rao" +Date: Thu, 6 Jan 2022 17:15:12 +0530 +Subject: powerpc64/bpf: Limit 'ldbrx' to processors compliant with ISA v2.06 +Patch-mainline: v5.17-rc2 +Git-commit: 3f5f766d5f7f95a69a630da3544a1a0cee1cdddf +References: jsc#PED-1368 + +Johan reported the below crash with test_bpf on ppc64 e5500: + + test_bpf: #296 ALU_END_FROM_LE 64: 0x0123456789abcdef -> 0x67452301 jited:1 + Oops: Exception in kernel mode, sig: 4 [#1] + BE PAGE_SIZE=4K SMP NR_CPUS=24 QEMU e500 + Modules linked in: test_bpf(+) + CPU: 0 PID: 76 Comm: insmod Not tainted 5.14.0-03771-g98c2059e008a-dirty #1 + NIP: 8000000000061c3c LR: 80000000006dea64 CTR: 8000000000061c18 + REGS: c0000000032d3420 TRAP: 0700 Not tainted (5.14.0-03771-g98c2059e008a-dirty) + MSR: 0000000080089000 CR: 88002822 XER: 20000000 IRQMASK: 0 + <...> + NIP [8000000000061c3c] 0x8000000000061c3c + LR [80000000006dea64] .__run_one+0x104/0x17c [test_bpf] + Call Trace: + .__run_one+0x60/0x17c [test_bpf] (unreliable) + .test_bpf_init+0x6a8/0xdc8 [test_bpf] + .do_one_initcall+0x6c/0x28c + .do_init_module+0x68/0x28c + .load_module+0x2460/0x2abc + .__do_sys_init_module+0x120/0x18c + .system_call_exception+0x110/0x1b8 + system_call_common+0xf0/0x210 + --- interrupt: c00 at 0x101d0acc + <...> + ---[ end trace 47b2bf19090bb3d0 ]--- + + Illegal instruction + +The illegal instruction turned out to be 'ldbrx' emitted for +BPF_FROM_[L|B]E, which was only introduced in ISA v2.06. Guard use of +the same and implement an alternative approach for older processors. + +Fixes: 156d0e290e969c ("powerpc/ebpf/jit: Implement JIT compiler for extended BPF") +Reported-by: Johan Almbladh +Signed-off-by: Naveen N. Rao +Tested-by: Johan Almbladh +Acked-by: Johan Almbladh +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/d1e51c6fdf572062cf3009a751c3406bda01b832.1641468127.git.naveen.n.rao@linux.vnet.ibm.com +Acked-by: Shung-Hsi Yu +--- + arch/powerpc/include/asm/ppc-opcode.h | 1 + + arch/powerpc/net/bpf_jit_comp64.c | 22 +++++++++++++--------- + 2 files changed, 14 insertions(+), 9 deletions(-) + +--- a/arch/powerpc/include/asm/ppc-opcode.h ++++ b/arch/powerpc/include/asm/ppc-opcode.h +@@ -498,6 +498,7 @@ + #define PPC_RAW_LDX(r, base, b) (0x7c00002a | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) + #define PPC_RAW_LHZ(r, base, i) (0xa0000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) + #define PPC_RAW_LHBRX(r, base, b) (0x7c00062c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) ++#define PPC_RAW_LWBRX(r, base, b) (0x7c00042c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) + #define PPC_RAW_LDBRX(r, base, b) (0x7c000428 | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) + #define PPC_RAW_STWCX(s, a, b) (0x7c00012d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) + #define PPC_RAW_CMPWI(a, i) (0x2c000000 | ___PPC_RA(a) | IMM_L(i)) +--- a/arch/powerpc/net/bpf_jit_comp64.c ++++ b/arch/powerpc/net/bpf_jit_comp64.c +@@ -634,17 +634,21 @@ bpf_alu32_trunc: + EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); + break; + case 64: +- /* +- * Way easier and faster(?) to store the value +- * into stack and then use ldbrx +- * +- * ctx->seen will be reliable in pass2, but +- * the instructions generated will remain the +- * same across all passes +- */ ++ /* Store the value to stack and then use byte-reverse loads */ + PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx))); +- EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); ++ if (cpu_has_feature(CPU_FTR_ARCH_206)) { ++ EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); ++ } else { ++ EMIT(PPC_RAW_LWBRX(dst_reg, 0, b2p[TMP_REG_1])); ++ if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) ++ EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, 32)); ++ EMIT(PPC_RAW_LI(b2p[TMP_REG_2], 4)); ++ EMIT(PPC_RAW_LWBRX(b2p[TMP_REG_2], b2p[TMP_REG_2], b2p[TMP_REG_1])); ++ if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) ++ EMIT(PPC_RAW_SLDI(b2p[TMP_REG_2], b2p[TMP_REG_2], 32)); ++ EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_2])); ++ } + break; + } + break; diff --git a/patches.suse/pstore-blk-use-bdev_nr_bytes-instead-of-open-coding-.patch b/patches.suse/pstore-blk-use-bdev_nr_bytes-instead-of-open-coding-.patch new file mode 100644 index 0000000..6b270d9 --- /dev/null +++ b/patches.suse/pstore-blk-use-bdev_nr_bytes-instead-of-open-coding-.patch @@ -0,0 +1,51 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:21 +0200 +Subject: [PATCH] pstore/blk: use bdev_nr_bytes instead of open coding it +Git-commit: 4646198519c9aaa1c307ec4750ac64e08507d936 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helper to read the block device size. + +Signed-off-by: Christoph Hellwig +Acked-by: Kees Cook +Link: https://lore.kernel.org/r/20211018101130.1838532-22-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/pstore/blk.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c +index 04ce58c939a0..5d1fbaffd66a 100644 +--- a/fs/pstore/blk.c ++++ b/fs/pstore/blk.c +@@ -205,7 +205,6 @@ static ssize_t psblk_generic_blk_write(const char *buf, size_t bytes, + static int __register_pstore_blk(struct pstore_device_info *dev, + const char *devpath) + { +- struct inode *inode; + int ret = -ENODEV; + + lockdep_assert_held(&pstore_blk_lock); +@@ -217,14 +216,13 @@ static int __register_pstore_blk(struct pstore_device_info *dev, + goto err; + } + +- inode = file_inode(psblk_file); +- if (!S_ISBLK(inode->i_mode)) { ++ if (!S_ISBLK(file_inode(psblk_file)->i_mode)) { + pr_err("'%s' is not block device!\n", devpath); + goto err_fput; + } + +- inode = I_BDEV(psblk_file->f_mapping->host)->bd_inode; +- dev->zone.total_size = i_size_read(inode); ++ dev->zone.total_size = ++ bdev_nr_bytes(I_BDEV(psblk_file->f_mapping->host)); + + ret = __register_pstore_device(dev); + if (ret) +-- +2.35.3 + diff --git a/patches.suse/rbd-add-add_disk-error-handling.patch b/patches.suse/rbd-add-add_disk-error-handling.patch new file mode 100644 index 0000000..4d74e66 --- /dev/null +++ b/patches.suse/rbd-add-add_disk-error-handling.patch @@ -0,0 +1,45 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:02:28 -0700 +Subject: [PATCH] rbd: add add_disk() error handling +Git-commit: 27c97abc30e2b9ad2288977c0ecbef4d50553f57 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +Signed-off-by: Luis Chamberlain +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/rbd.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c +index bf60aebd0cfb..953fa134cd3d 100644 +--- a/drivers/block/rbd.c ++++ b/drivers/block/rbd.c +@@ -7054,7 +7054,9 @@ static ssize_t do_rbd_add(struct bus_type *bus, + if (rc) + goto err_out_image_lock; + +- device_add_disk(&rbd_dev->dev, rbd_dev->disk, NULL); ++ rc = device_add_disk(&rbd_dev->dev, rbd_dev->disk, NULL); ++ if (rc) ++ goto err_out_cleanup_disk; + + spin_lock(&rbd_dev_list_lock); + list_add_tail(&rbd_dev->node, &rbd_dev_list); +@@ -7068,6 +7070,8 @@ static ssize_t do_rbd_add(struct bus_type *bus, + module_put(THIS_MODULE); + return rc; + ++err_out_cleanup_disk: ++ rbd_free_disk(rbd_dev); + err_out_image_lock: + rbd_dev_image_unlock(rbd_dev); + rbd_dev_device_release(rbd_dev); +-- +2.35.3 + diff --git a/patches.suse/reiserfs-use-bdev_nr_bytes-instead-of-open-coding-it.patch b/patches.suse/reiserfs-use-bdev_nr_bytes-instead-of-open-coding-it.patch new file mode 100644 index 0000000..c1d653f --- /dev/null +++ b/patches.suse/reiserfs-use-bdev_nr_bytes-instead-of-open-coding-it.patch @@ -0,0 +1,39 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:22 +0200 +Subject: [PATCH] reiserfs: use bdev_nr_bytes instead of open coding it +Git-commit: 1d5dd3b9164c77816a679d39dbd28787d3f22a3a +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helper to read the block device size and remove two +cargo culted checks that can't be false. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Reviewed-by: Jan Kara +Reviewed-by: Chaitanya Kulkarni +Link: https://lore.kernel.org/r/20211018101130.1838532-23-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/reiserfs/super.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c +index 58481f8d63d5..8647a00434ea 100644 +--- a/fs/reiserfs/super.c ++++ b/fs/reiserfs/super.c +@@ -1986,9 +1986,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) + * smaller than the filesystem. If the check fails then abort and + * scream, because bad stuff will happen otherwise. + */ +- if (s->s_bdev && s->s_bdev->bd_inode +- && i_size_read(s->s_bdev->bd_inode) < +- sb_block_count(rs) * sb_blocksize(rs)) { ++ if (bdev_nr_bytes(s->s_bdev) < sb_block_count(rs) * sb_blocksize(rs)) { + SWARN(silent, s, "", "Filesystem cannot be " + "mounted because it is bigger than the device"); + SWARN(silent, s, "", "You may need to run fsck " +-- +2.35.3 + diff --git a/patches.suse/reiserfs-use-sb_bdev_nr_blocks.patch b/patches.suse/reiserfs-use-sb_bdev_nr_blocks.patch new file mode 100644 index 0000000..f0472f7 --- /dev/null +++ b/patches.suse/reiserfs-use-sb_bdev_nr_blocks.patch @@ -0,0 +1,37 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:29 +0200 +Subject: [PATCH] reiserfs: use sb_bdev_nr_blocks +Git-commit: 2ffae493dc15ee2225bd68254e1cdb1449b81ec7 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the sb_bdev_nr_blocks helper instead of open coding it. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20211018101130.1838532-30-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/reiserfs/super.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c +index 8647a00434ea..076f9ab94306 100644 +--- a/fs/reiserfs/super.c ++++ b/fs/reiserfs/super.c +@@ -1199,9 +1199,7 @@ static int reiserfs_parse_options(struct super_block *s, + + if (!strcmp(arg, "auto")) { + /* From JFS code, to auto-get the size. */ +- *blocks = +- i_size_read(s->s_bdev->bd_inode) >> s-> +- s_blocksize_bits; ++ *blocks = sb_bdev_nr_blocks(s); + } else { + *blocks = simple_strtoul(arg, &p, 0); + if (*p != '\0') { +-- +2.35.3 + diff --git a/patches.suse/ring-buffer-Include-dropped-pages-in-counting-dirty-patches.patch b/patches.suse/ring-buffer-Include-dropped-pages-in-counting-dirty-patches.patch new file mode 100644 index 0000000..e4c0bda --- /dev/null +++ b/patches.suse/ring-buffer-Include-dropped-pages-in-counting-dirty-patches.patch @@ -0,0 +1,91 @@ +From: "Steven Rostedt (Google)" +Date: Fri, 21 Oct 2022 12:30:13 -0400 +Subject: ring-buffer: Include dropped pages in counting dirty patches +Git-commit: 31029a8b2c7e656a0289194ef16415050ae4c4ac +Patch-mainline: v6.1-rc6 +References: git-fixes + +The function ring_buffer_nr_dirty_pages() was created to find out how many +pages are filled in the ring buffer. There's two running counters. One is +incremented whenever a new page is touched (pages_touched) and the other +is whenever a page is read (pages_read). The dirty count is the number +touched minus the number read. This is used to determine if a blocked task +should be woken up if the percentage of the ring buffer it is waiting for +is hit. + +The problem is that it does not take into account dropped pages (when the +new writes overwrite pages that were not read). And then the dirty pages +will always be greater than the percentage. + +This makes the "buffer_percent" file inaccurate, as the number of dirty +pages end up always being larger than the percentage, event when it's not +and this causes user space to be woken up more than it wants to be. + +Add a new counter to keep track of lost pages, and include that in the +accounting of dirty pages so that it is actually accurate. + +Link: https://lkml.kernel.org/r/20221021123013.55fb6055@gandalf.local.home + +Fixes: 2c2b0a78b3739 ("ring-buffer: Add percentage of ring buffer full to wake up reader") +Signed-off-by: Steven Rostedt (Google) +Acked-by: Petr Pavlu +--- + kernel/trace/ring_buffer.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c +index 089b1ec9cb3b..a19369c4d8df 100644 +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -519,6 +519,7 @@ struct ring_buffer_per_cpu { + local_t committing; + local_t commits; + local_t pages_touched; ++ local_t pages_lost; + local_t pages_read; + long last_pages_touch; + size_t shortest_full; +@@ -894,10 +895,18 @@ size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu) + size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu) + { + size_t read; ++ size_t lost; + size_t cnt; + + read = local_read(&buffer->buffers[cpu]->pages_read); ++ lost = local_read(&buffer->buffers[cpu]->pages_lost); + cnt = local_read(&buffer->buffers[cpu]->pages_touched); ++ ++ if (WARN_ON_ONCE(cnt < lost)) ++ return 0; ++ ++ cnt -= lost; ++ + /* The reader can read an empty page, but not more than that */ + if (cnt < read) { + WARN_ON_ONCE(read > cnt + 1); +@@ -2031,6 +2040,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) + */ + local_add(page_entries, &cpu_buffer->overrun); + local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); ++ local_inc(&cpu_buffer->pages_lost); + } + + /* +@@ -2515,6 +2525,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, + */ + local_add(entries, &cpu_buffer->overrun); + local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); ++ local_inc(&cpu_buffer->pages_lost); + + /* + * The entries will be zeroed out when we move the +@@ -5265,6 +5276,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) + local_set(&cpu_buffer->committing, 0); + local_set(&cpu_buffer->commits, 0); + local_set(&cpu_buffer->pages_touched, 0); ++ local_set(&cpu_buffer->pages_lost, 0); + local_set(&cpu_buffer->pages_read, 0); + cpu_buffer->last_pages_touch = 0; + cpu_buffer->shortest_full = 0; + diff --git a/patches.suse/ring_buffer-Do-not-deactivate-non-existant-pages.patch b/patches.suse/ring_buffer-Do-not-deactivate-non-existant-pages.patch new file mode 100644 index 0000000..4f50500 --- /dev/null +++ b/patches.suse/ring_buffer-Do-not-deactivate-non-existant-pages.patch @@ -0,0 +1,41 @@ +From: Daniil Tatianin +Date: Mon, 14 Nov 2022 17:31:29 +0300 +Subject: ring_buffer: Do not deactivate non-existant pages +Git-commit: 56f4ca0a79a9f1af98f26c54b9b89ba1f9bcc6bd +Patch-mainline: v6.1-rc6 +References: git-fixes + +rb_head_page_deactivate() expects cpu_buffer to contain a valid list of +->pages, so verify that the list is actually present before calling it. + +Found by Linux Verification Center (linuxtesting.org) with the SVACE +static analysis tool. + +Link: https://lkml.kernel.org/r/20221114143129.3534443-1-d-tatianin@yandex-team.ru + +Cc: stable@vger.kernel.org +Fixes: 77ae365eca895 ("ring-buffer: make lockless") +Signed-off-by: Daniil Tatianin +Signed-off-by: Steven Rostedt (Google) +Acked-by: Petr Pavlu +--- + kernel/trace/ring_buffer.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c +index a19369c4d8df..b21bf14bae9b 100644 +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -1802,9 +1802,9 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) + + free_buffer_page(cpu_buffer->reader_page); + +- rb_head_page_deactivate(cpu_buffer); +- + if (head) { ++ rb_head_page_deactivate(cpu_buffer); ++ + list_for_each_entry_safe(bpage, tmp, head, list) { + list_del_init(&bpage->list); + free_buffer_page(bpage); + diff --git a/patches.suse/s390-block-xpram-include-major-h.patch b/patches.suse/s390-block-xpram-include-major-h.patch deleted file mode 100644 index 3f0f7a9..0000000 --- a/patches.suse/s390-block-xpram-include-major-h.patch +++ /dev/null @@ -1,22 +0,0 @@ -From: Hannes Reinecke -Subject: s390: block/xpram include -Patch-Mainline: never, SLE15-SP5 specific -References: bsc#1205381 - -We have not removed the XPRAM driver as per commit -68c32eb2707a ("s390: remove xpram device driver"), so we need to fix -up includes to make the driver compile again. - -Signed-off-by: Hannes Reinecke - -diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c ---- a/drivers/s390/block/xpram.c -+++ b/drivers/s390/block/xpram.c -@@ -41,6 +41,7 @@ - #include - #include - #include -+#include - - #define XPRAM_NAME "xpram" - #define XPRAM_DEVS 1 /* one partition */ diff --git a/patches.suse/s390-pci-fix-clp_get_state-handling-of-ENODEV b/patches.suse/s390-pci-fix-clp_get_state-handling-of-ENODEV new file mode 100644 index 0000000..43246c5 --- /dev/null +++ b/patches.suse/s390-pci-fix-clp_get_state-handling-of-ENODEV @@ -0,0 +1,51 @@ +From: Niklas Schnelle +Date: Fri, 3 Sep 2021 18:27:01 +0200 +Subject: s390/pci: fix clp_get_state() handling of -ENODEV +Git-commit: ebd9cc6593691e6bc8526e368cedbdfc8034f403 +Patch-mainline: v5.15-rc1 +References: jsc#PED-592 + +With commit cc049eecfb7a ("s390/pci: simplify CLP List PCI handling") +clp_get_state() was changed to make use of the new clp_find_pci() helper +function to query a specific function. This however returns -ENODEV when +the device is not found at all and this error was passed to the caller. +It was missed however that the callers actually expect a success return +from clp_get_state() if the device is gone. + +Fix this by handling the -ENODEV return of clp_find_pci() explicitly in +clp_get_state() returning success and setting the state parameter to +ZPCI_FN_STATE_RESERVED matching the design concept that a PCI function +that disappeared must have been resverved elsewhere. For all other error +returns continue to just pass them on to the caller. + +Reviewed-by: Matthew Rosato +Fixes: cc049eecfb7a ("s390/pci: simplify CLP List PCI handling") +Signed-off-by: Niklas Schnelle +Signed-off-by: Heiko Carstens +Acked-by: Petr Tesarik +--- + arch/s390/pci/pci_clp.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/arch/s390/pci/pci_clp.c ++++ b/arch/s390/pci/pci_clp.c +@@ -449,14 +449,17 @@ int clp_get_state(u32 fid, enum zpci_sta + struct clp_fh_list_entry entry; + int rc; + +- *state = ZPCI_FN_STATE_RESERVED; + rrb = clp_alloc_block(GFP_ATOMIC); + if (!rrb) + return -ENOMEM; + + rc = clp_find_pci(rrb, fid, &entry); +- if (!rc) ++ if (!rc) { + *state = entry.config_state; ++ } else if (rc == -ENODEV) { ++ *state = ZPCI_FN_STATE_RESERVED; ++ rc = 0; ++ } + + clp_free_block(rrb); + return rc; diff --git a/patches.suse/s390-pci-handle-FH-state-mismatch-only-on-disable b/patches.suse/s390-pci-handle-FH-state-mismatch-only-on-disable new file mode 100644 index 0000000..124f272 --- /dev/null +++ b/patches.suse/s390-pci-handle-FH-state-mismatch-only-on-disable @@ -0,0 +1,99 @@ +From: Niklas Schnelle +Date: Thu, 22 Jul 2021 12:38:29 +0200 +Subject: s390/pci: handle FH state mismatch only on disable +Git-commit: 8256adda1f44ea1ec763711aefcd25f8c0cf93f3 +Patch-mainline: v5.15-rc1 +References: jsc#PED-592 + +Instead of always treating CLP_RC_SETPCIFN_ALRDY as success and blindly +updating the function handle restrict this special handling to the +disable case by moving it into zpci_disable_device() and still treating +it as an error while also updating the function handle such that +a subsequent zpci_disable_device() succeeds or the caller can ignore the +error when aborting is not an option such as for zPCI event 0x304. +Also print this occurrence to the log such that an admin can tell why +a disable operation returned an error. + +A mismatch between the state of the underlying device and our view of it +can naturally happen when the device suddenly enters the error state but +we haven't gotten the error notification yet, it must not happen on +enable though. + +Reviewed-by: Matthew Rosato +Signed-off-by: Niklas Schnelle +Signed-off-by: Heiko Carstens +Acked-by: Petr Tesarik +--- + arch/s390/include/asm/pci.h | 1 + + arch/s390/pci/pci.c | 15 ++++++++++++++- + arch/s390/pci/pci_clp.c | 6 +----- + 3 files changed, 16 insertions(+), 6 deletions(-) + +--- a/arch/s390/include/asm/pci.h ++++ b/arch/s390/include/asm/pci.h +@@ -219,6 +219,7 @@ int clp_query_pci_fn(struct zpci_dev *zd + int clp_enable_fh(struct zpci_dev *, u8); + int clp_disable_fh(struct zpci_dev *); + int clp_get_state(u32 fid, enum zpci_state *state); ++int clp_refresh_fh(u32 fid); + + /* UID */ + void update_uid_checking(bool new); +--- a/arch/s390/pci/pci.c ++++ b/arch/s390/pci/pci.c +@@ -680,12 +680,25 @@ out: + + int zpci_disable_device(struct zpci_dev *zdev) + { ++ int cc, rc = 0; ++ + zpci_dma_exit_device(zdev); + /* + * The zPCI function may already be disabled by the platform, this is + * detected in clp_disable_fh() which becomes a no-op. + */ +- return clp_disable_fh(zdev) ? -EIO : 0; ++ cc = clp_disable_fh(zdev); ++ if (cc == CLP_RC_SETPCIFN_ALRDY) { ++ pr_info("Disabling PCI function %08x had no effect as it was already disabled\n", ++ zdev->fid); ++ /* Function is already disabled - update handle */ ++ rc = clp_refresh_fh(zdev->fid); ++ if (!rc) ++ rc = -EINVAL; ++ } else if (cc) { ++ rc = -EIO; ++ } ++ return rc; + } + + /** +--- a/arch/s390/pci/pci_clp.c ++++ b/arch/s390/pci/pci_clp.c +@@ -212,7 +212,6 @@ out: + return rc; + } + +-static int clp_refresh_fh(u32 fid); + /** + * clp_set_pci_fn() - Execute a command on a PCI function + * @zdev: Function that will be affected +@@ -251,9 +250,6 @@ static int clp_set_pci_fn(struct zpci_de + + if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { + zdev->fh = rrb->response.fh; +- } else if (!rc && rrb->response.hdr.rsp == CLP_RC_SETPCIFN_ALRDY) { +- /* Function is already in desired state - update handle */ +- rc = clp_refresh_fh(zdev->fid); + } else { + zpci_err("Set PCI FN:\n"); + zpci_err_clp(rrb->response.hdr.rsp, rc); +@@ -409,7 +405,7 @@ static void __clp_refresh_fh(struct clp_ + /* + * Refresh the function handle of the function matching @fid + */ +-static int clp_refresh_fh(u32 fid) ++int clp_refresh_fh(u32 fid) + { + struct clp_req_rsp_list_pci *rrb; + int rc; diff --git a/patches.suse/s390-pci-implement-minimal-PCI-error-recovery b/patches.suse/s390-pci-implement-minimal-PCI-error-recovery new file mode 100644 index 0000000..967da72 --- /dev/null +++ b/patches.suse/s390-pci-implement-minimal-PCI-error-recovery @@ -0,0 +1,372 @@ +From: Niklas Schnelle +Date: Wed, 7 Jul 2021 11:00:01 +0200 +Subject: s390/pci: implement minimal PCI error recovery +Git-commit: 4cdf2f4e24ff0d345fc36ef6d6aec059333a261e +Patch-mainline: v5.16-rc1 +References: jsc#PED-592 + +When the platform detects an error on a PCI function or a service action +has been performed it is put in the error state and an error event +notification is provided to the OS. + +Currently we treat all error event notifications the same and simply set +pdev->error_state = pci_channel_io_perm_failure requiring user +intervention such as use of the recover attribute to get the device +usable again. Despite requiring a manual step this also has the +disadvantage that the device is completely torn down and recreated +resulting in higher level devices such as a block or network device +being recreated. In case of a block device this also means that it may +need to be removed and added to a software raid even if that could +otherwise survive with a temporary degradation. + +This is of course not ideal more so since an error notification with PEC +0x3A indicates that the platform already performed error recovery +successfully or that the error state was caused by a service action that +is now finished. + +At least in this case we can assume that the error state can be reset +and the function made usable again. So as not to have the disadvantage +of a full tear down and recreation we need to coordinate this recovery +with the driver. Thankfully there is already a well defined recovery +flow for this described in Documentation/PCI/pci-error-recovery.rst. + +The implementation of this is somewhat straight forward and simplified +by the fact that our recovery flow is defined per PCI function. As +a reset we use the newly introduced zpci_hot_reset_device() which also +takes the PCI function out of the error state. + +Reviewed-by: Pierre Morel +Acked-by: Matthew Rosato +Signed-off-by: Niklas Schnelle +Signed-off-by: Vasily Gorbik +Acked-by: Petr Tesarik +--- + arch/s390/include/asm/pci.h | 4 + arch/s390/pci/pci.c | 53 ++++++++++ + arch/s390/pci/pci_event.c | 224 +++++++++++++++++++++++++++++++++++++++++++- + 3 files changed, 277 insertions(+), 4 deletions(-) + +--- a/arch/s390/include/asm/pci.h ++++ b/arch/s390/include/asm/pci.h +@@ -296,8 +296,10 @@ void zpci_debug_exit(void); + void zpci_debug_init_device(struct zpci_dev *, const char *); + void zpci_debug_exit_device(struct zpci_dev *); + +-/* Error reporting */ ++/* Error handling */ + int zpci_report_error(struct pci_dev *, struct zpci_report_error_header *); ++int zpci_clear_error_state(struct zpci_dev *zdev); ++int zpci_reset_load_store_blocked(struct zpci_dev *zdev); + + #ifdef CONFIG_NUMA + +--- a/arch/s390/pci/pci.c ++++ b/arch/s390/pci/pci.c +@@ -990,6 +990,59 @@ int zpci_report_error(struct pci_dev *pd + } + EXPORT_SYMBOL(zpci_report_error); + ++/** ++ * zpci_clear_error_state() - Clears the zPCI error state of the device ++ * @zdev: The zdev for which the zPCI error state should be reset ++ * ++ * Clear the zPCI error state of the device. If clearing the zPCI error state ++ * fails the device is left in the error state. In this case it may make sense ++ * to call zpci_io_perm_failure() on the associated pdev if it exists. ++ * ++ * Returns: 0 on success, -EIO otherwise ++ */ ++int zpci_clear_error_state(struct zpci_dev *zdev) ++{ ++ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_ERROR); ++ struct zpci_fib fib = {0}; ++ u8 status; ++ int cc; ++ ++ cc = zpci_mod_fc(req, &fib, &status); ++ if (cc) { ++ zpci_dbg(3, "ces fid:%x, cc:%d, status:%x\n", zdev->fid, cc, status); ++ return -EIO; ++ } ++ ++ return 0; ++} ++ ++/** ++ * zpci_reset_load_store_blocked() - Re-enables L/S from error state ++ * @zdev: The zdev for which to unblock load/store access ++ * ++ * Re-enables load/store access for a PCI function in the error state while ++ * keeping DMA blocked. In this state drivers can poke MMIO space to determine ++ * if error recovery is possible while catching any rogue DMA access from the ++ * device. ++ * ++ * Returns: 0 on success, -EIO otherwise ++ */ ++int zpci_reset_load_store_blocked(struct zpci_dev *zdev) ++{ ++ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_BLOCK); ++ struct zpci_fib fib = {0}; ++ u8 status; ++ int cc; ++ ++ cc = zpci_mod_fc(req, &fib, &status); ++ if (cc) { ++ zpci_dbg(3, "rls fid:%x, cc:%d, status:%x\n", zdev->fid, cc, status); ++ return -EIO; ++ } ++ ++ return 0; ++} ++ + static int zpci_mem_init(void) + { + BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) || +--- a/arch/s390/pci/pci_event.c ++++ b/arch/s390/pci/pci_event.c +@@ -47,16 +47,221 @@ struct zpci_ccdf_avail { + u16 pec; /* PCI event code */ + } __packed; + ++static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res) ++{ ++ switch (ers_res) { ++ case PCI_ERS_RESULT_CAN_RECOVER: ++ case PCI_ERS_RESULT_RECOVERED: ++ case PCI_ERS_RESULT_NEED_RESET: ++ return false; ++ default: ++ return true; ++ } ++} ++ ++static bool is_passed_through(struct zpci_dev *zdev) ++{ ++ return zdev->s390_domain; ++} ++ ++static bool is_driver_supported(struct pci_driver *driver) ++{ ++ if (!driver || !driver->err_handler) ++ return false; ++ if (!driver->err_handler->error_detected) ++ return false; ++ if (!driver->err_handler->slot_reset) ++ return false; ++ if (!driver->err_handler->resume) ++ return false; ++ return true; ++} ++ ++static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev, ++ struct pci_driver *driver) ++{ ++ pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; ++ ++ ers_res = driver->err_handler->error_detected(pdev, pdev->error_state); ++ if (ers_result_indicates_abort(ers_res)) ++ pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev)); ++ else if (ers_res == PCI_ERS_RESULT_NEED_RESET) ++ pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); ++ ++ return ers_res; ++} ++ ++static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, ++ struct pci_driver *driver) ++{ ++ pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; ++ struct zpci_dev *zdev = to_zpci(pdev); ++ int rc; ++ ++ pr_info("%s: Unblocking device access for examination\n", pci_name(pdev)); ++ rc = zpci_reset_load_store_blocked(zdev); ++ if (rc) { ++ pr_err("%s: Unblocking device access failed\n", pci_name(pdev)); ++ /* Let's try a full reset instead */ ++ return PCI_ERS_RESULT_NEED_RESET; ++ } ++ ++ if (driver->err_handler->mmio_enabled) { ++ ers_res = driver->err_handler->mmio_enabled(pdev); ++ if (ers_result_indicates_abort(ers_res)) { ++ pr_info("%s: Automatic recovery failed after MMIO re-enable\n", ++ pci_name(pdev)); ++ return ers_res; ++ } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { ++ pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); ++ return ers_res; ++ } ++ } ++ ++ pr_debug("%s: Unblocking DMA\n", pci_name(pdev)); ++ rc = zpci_clear_error_state(zdev); ++ if (!rc) { ++ pdev->error_state = pci_channel_io_normal; ++ } else { ++ pr_err("%s: Unblocking DMA failed\n", pci_name(pdev)); ++ /* Let's try a full reset instead */ ++ return PCI_ERS_RESULT_NEED_RESET; ++ } ++ ++ return ers_res; ++} ++ ++static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev, ++ struct pci_driver *driver) ++{ ++ pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; ++ ++ pr_info("%s: Initiating reset\n", pci_name(pdev)); ++ if (zpci_hot_reset_device(to_zpci(pdev))) { ++ pr_err("%s: The reset request failed\n", pci_name(pdev)); ++ return ers_res; ++ } ++ pdev->error_state = pci_channel_io_normal; ++ ers_res = driver->err_handler->slot_reset(pdev); ++ if (ers_result_indicates_abort(ers_res)) { ++ pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev)); ++ return ers_res; ++ } ++ ++ return ers_res; ++} ++ ++/* zpci_event_attempt_error_recovery - Try to recover the given PCI function ++ * @pdev: PCI function to recover currently in the error state ++ * ++ * We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst. ++ * With the simplification that recovery always happens per function ++ * and the platform determines which functions are affected for ++ * multi-function devices. ++ */ ++static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) ++{ ++ pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; ++ struct pci_driver *driver; ++ ++ /* ++ * Ensure that the PCI function is not removed concurrently, no driver ++ * is unbound or probed and that userspace can't access its ++ * configuration space while we perform recovery. ++ */ ++ pci_dev_lock(pdev); ++ if (pdev->error_state == pci_channel_io_perm_failure) { ++ ers_res = PCI_ERS_RESULT_DISCONNECT; ++ goto out_unlock; ++ } ++ pdev->error_state = pci_channel_io_frozen; ++ ++ if (is_passed_through(to_zpci(pdev))) { ++ pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n", ++ pci_name(pdev)); ++ goto out_unlock; ++ } ++ ++ driver = to_pci_driver(pdev->dev.driver); ++ if (!is_driver_supported(driver)) { ++ if (!driver) ++ pr_info("%s: Cannot be recovered because no driver is bound to the device\n", ++ pci_name(pdev)); ++ else ++ pr_info("%s: The %s driver bound to the device does not support error recovery\n", ++ pci_name(pdev), ++ driver->name); ++ goto out_unlock; ++ } ++ ++ ers_res = zpci_event_notify_error_detected(pdev, driver); ++ if (ers_result_indicates_abort(ers_res)) ++ goto out_unlock; ++ ++ if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) { ++ ers_res = zpci_event_do_error_state_clear(pdev, driver); ++ if (ers_result_indicates_abort(ers_res)) ++ goto out_unlock; ++ } ++ ++ if (ers_res == PCI_ERS_RESULT_NEED_RESET) ++ ers_res = zpci_event_do_reset(pdev, driver); ++ ++ if (ers_res != PCI_ERS_RESULT_RECOVERED) { ++ pr_err("%s: Automatic recovery failed; operator intervention is required\n", ++ pci_name(pdev)); ++ goto out_unlock; ++ } ++ ++ pr_info("%s: The device is ready to resume operations\n", pci_name(pdev)); ++ if (driver->err_handler->resume) ++ driver->err_handler->resume(pdev); ++out_unlock: ++ pci_dev_unlock(pdev); ++ ++ return ers_res; ++} ++ ++/* zpci_event_io_failure - Report PCI channel failure state to driver ++ * @pdev: PCI function for which to report ++ * @es: PCI channel failure state to report ++ */ ++static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es) ++{ ++ struct pci_driver *driver; ++ ++ pci_dev_lock(pdev); ++ pdev->error_state = es; ++ /** ++ * While vfio-pci's error_detected callback notifies user-space QEMU ++ * reacts to this by freezing the guest. In an s390 environment PCI ++ * errors are rarely fatal so this is overkill. Instead in the future ++ * we will inject the error event and let the guest recover the device ++ * itself. ++ */ ++ if (is_passed_through(to_zpci(pdev))) ++ goto out; ++ driver = to_pci_driver(pdev->dev.driver); ++ if (driver && driver->err_handler && driver->err_handler->error_detected) ++ driver->err_handler->error_detected(pdev, pdev->error_state); ++out: ++ pci_dev_unlock(pdev); ++} ++ + static void __zpci_event_error(struct zpci_ccdf_err *ccdf) + { + struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); + struct pci_dev *pdev = NULL; ++ pci_ers_result_t ers_res; + + zpci_err("error CCDF:\n"); + zpci_err_hex(ccdf, sizeof(*ccdf)); + +- if (zdev) +- pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); ++ if (zdev) { ++ zpci_update_fh(zdev, ccdf->fh); ++ if (zdev->zbus->bus) ++ pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); ++ } + + pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n", + pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); +@@ -64,7 +269,20 @@ static void __zpci_event_error(struct zp + if (!pdev) + return; + +- pdev->error_state = pci_channel_io_perm_failure; ++ switch (ccdf->pec) { ++ case 0x003a: /* Service Action or Error Recovery Successful */ ++ ers_res = zpci_event_attempt_error_recovery(pdev); ++ if (ers_res != PCI_ERS_RESULT_RECOVERED) ++ zpci_event_io_failure(pdev, pci_channel_io_perm_failure); ++ break; ++ default: ++ /* ++ * Mark as frozen not permanently failed because the device ++ * could be subsequently recovered by the platform. ++ */ ++ zpci_event_io_failure(pdev, pci_channel_io_frozen); ++ break; ++ } + pci_dev_put(pdev); + } + diff --git a/patches.suse/s390-pci-implement-reset_slot-for-hotplug-slot b/patches.suse/s390-pci-implement-reset_slot-for-hotplug-slot new file mode 100644 index 0000000..74aa5b8 --- /dev/null +++ b/patches.suse/s390-pci-implement-reset_slot-for-hotplug-slot @@ -0,0 +1,166 @@ +From: Niklas Schnelle +Date: Thu, 1 Jul 2021 15:49:11 +0200 +Subject: s390/pci: implement reset_slot for hotplug slot +Git-commit: da995d538d3a17610d89fea0f5813cf7921b3c2c +Patch-mainline: v5.16-rc1 +References: jsc#PED-592 + +This is done by adding a zpci_hot_reset_device() call which does a low +level reset of the PCI function without changing its higher level +function state. This way it can be used while the zPCI function is bound +to a driver and with DMA tables being controlled either through the +IOMMU or DMA APIs which is prohibited when using zpci_disable_device() +as that drop existing DMA translations. + +As this reset, unlike a normal FLR, also calls zpci_clear_irq() we need +to implement arch_restore_msi_irqs() and make sure we re-enable IRQs for +the PCI function if they were previously disabled. + +Reviewed-by: Pierre Morel +Reviewed-by: Matthew Rosato +Signed-off-by: Niklas Schnelle +Signed-off-by: Vasily Gorbik +Acked-by: Petr Tesarik +--- + arch/s390/include/asm/pci.h | 1 + arch/s390/pci/pci.c | 59 +++++++++++++++++++++++++++++++++++++ + arch/s390/pci/pci_irq.c | 9 +++++ + drivers/pci/hotplug/s390_pci_hpc.c | 24 +++++++++++++++ + 4 files changed, 93 insertions(+) + +--- a/arch/s390/include/asm/pci.h ++++ b/arch/s390/include/asm/pci.h +@@ -210,6 +210,7 @@ int zpci_deconfigure_device(struct zpci_ + void zpci_device_reserved(struct zpci_dev *zdev); + bool zpci_is_device_configured(struct zpci_dev *zdev); + ++int zpci_hot_reset_device(struct zpci_dev *zdev); + int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); + int zpci_unregister_ioat(struct zpci_dev *, u8); + void zpci_remove_reserved_devices(void); +--- a/arch/s390/pci/pci.c ++++ b/arch/s390/pci/pci.c +@@ -724,6 +724,65 @@ int zpci_disable_device(struct zpci_dev + } + + /** ++ * zpci_hot_reset_device - perform a reset of the given zPCI function ++ * @zdev: the slot which should be reset ++ * ++ * Performs a low level reset of the zPCI function. The reset is low level in ++ * the sense that the zPCI function can be reset without detaching it from the ++ * common PCI subsystem. The reset may be performed while under control of ++ * either DMA or IOMMU APIs in which case the existing DMA/IOMMU translation ++ * table is reinstated at the end of the reset. ++ * ++ * After the reset the functions internal state is reset to an initial state ++ * equivalent to its state during boot when first probing a driver. ++ * Consequently after reset the PCI function requires re-initialization via the ++ * common PCI code including re-enabling IRQs via pci_alloc_irq_vectors() ++ * and enabling the function via e.g.pci_enablde_device_flags().The caller ++ * must guard against concurrent reset attempts. ++ * ++ * In most cases this function should not be called directly but through ++ * pci_reset_function() or pci_reset_bus() which handle the save/restore and ++ * locking. ++ * ++ * Return: 0 on success and an error value otherwise ++ */ ++int zpci_hot_reset_device(struct zpci_dev *zdev) ++{ ++ int rc; ++ ++ zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh); ++ if (zdev_enabled(zdev)) { ++ /* Disables device access, DMAs and IRQs (reset state) */ ++ rc = zpci_disable_device(zdev); ++ /* ++ * Due to a z/VM vs LPAR inconsistency in the error state the ++ * FH may indicate an enabled device but disable says the ++ * device is already disabled don't treat it as an error here. ++ */ ++ if (rc == -EINVAL) ++ rc = 0; ++ if (rc) ++ return rc; ++ } ++ ++ rc = zpci_enable_device(zdev); ++ if (rc) ++ return rc; ++ ++ if (zdev->dma_table) ++ rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, ++ (u64)zdev->dma_table); ++ else ++ rc = zpci_dma_init_device(zdev); ++ if (rc) { ++ zpci_disable_device(zdev); ++ return rc; ++ } ++ ++ return 0; ++} ++ ++/** + * zpci_create_device() - Create a new zpci_dev and add it to the zbus + * @fid: Function ID of the device to be created + * @fh: Current Function Handle of the device to be created +--- a/arch/s390/pci/pci_irq.c ++++ b/arch/s390/pci/pci_irq.c +@@ -387,6 +387,15 @@ void arch_teardown_msi_irqs(struct pci_d + airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, zdev->msi_nr_irqs); + } + ++void arch_restore_msi_irqs(struct pci_dev *pdev) ++{ ++ struct zpci_dev *zdev = to_zpci(pdev); ++ ++ if (!zdev->irqs_registered) ++ zpci_set_irq(zdev); ++ default_restore_msi_irqs(pdev); ++} ++ + static struct airq_struct zpci_airq = { + .handler = zpci_floating_irq_handler, + .isc = PCI_ISC, +--- a/drivers/pci/hotplug/s390_pci_hpc.c ++++ b/drivers/pci/hotplug/s390_pci_hpc.c +@@ -57,6 +57,29 @@ static int disable_slot(struct hotplug_s + return zpci_deconfigure_device(zdev); + } + ++static int reset_slot(struct hotplug_slot *hotplug_slot, bool probe) ++{ ++ struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev, ++ hotplug_slot); ++ ++ if (zdev->state != ZPCI_FN_STATE_CONFIGURED) ++ return -EIO; ++ /* ++ * We can't take the zdev->lock as reset_slot may be called during ++ * probing and/or device removal which already happens under the ++ * zdev->lock. Instead the user should use the higher level ++ * pci_reset_function() or pci_bus_reset() which hold the PCI device ++ * lock preventing concurrent removal. If not using these functions ++ * holding the PCI device lock is required. ++ */ ++ ++ /* As long as the function is configured we can reset */ ++ if (probe) ++ return 0; ++ ++ return zpci_hot_reset_device(zdev); ++} ++ + static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value) + { + struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev, +@@ -76,6 +99,7 @@ static int get_adapter_status(struct hot + static const struct hotplug_slot_ops s390_hotplug_slot_ops = { + .enable_slot = enable_slot, + .disable_slot = disable_slot, ++ .reset_slot = reset_slot, + .get_power_status = get_power_status, + .get_adapter_status = get_adapter_status, + }; diff --git a/patches.suse/s390-pci-improve-DMA-translation-init-and-exit b/patches.suse/s390-pci-improve-DMA-translation-init-and-exit new file mode 100644 index 0000000..4ebd647 --- /dev/null +++ b/patches.suse/s390-pci-improve-DMA-translation-init-and-exit @@ -0,0 +1,317 @@ +From: Niklas Schnelle +Date: Fri, 16 Jul 2021 11:53:37 +0200 +Subject: s390/pci: improve DMA translation init and exit +Git-commit: 1f3f76812d5dfc791193b39c2140a8bd09962c0e +Patch-mainline: v5.15-rc1 +References: jsc#PED-592 + +Currently zpci_dma_init_device()/zpci_dma_exit_device() is called as +part of zpci_enable_device()/zpci_disable_device() and errors for +zpci_dma_exit_device() are always ignored even if we could abort. + +Improve upon this by moving zpci_dma_exit_device() out of +zpci_disable_device() and check for errors whenever we have a way to +abort the current operation. Note that for example in +zpci_event_hard_deconfigured() the device is expected to be gone so we +really can't abort and proceed even in case of error. + +Similarly move the cc == 3 special case out of zpci_unregister_ioat() +and into the callers allowing to abort when finding an already disabled +devices precludes proceeding with the operation. + +While we are at it log IOAT register/unregister errors in the s390 +debugfs log, + +Reviewed-by: Matthew Rosato +Signed-off-by: Niklas Schnelle +Signed-off-by: Heiko Carstens +Acked-by: Petr Tesarik +--- + arch/s390/include/asm/pci.h | 2 + + arch/s390/include/asm/pci_dma.h | 2 - + arch/s390/pci/pci.c | 43 +++++++++++++++++----------------------- + arch/s390/pci/pci_bus.c | 5 ++++ + arch/s390/pci/pci_dma.c | 25 +++++++++++++++-------- + arch/s390/pci/pci_event.c | 5 +++- + arch/s390/pci/pci_sysfs.c | 19 ++++++++++++++--- + drivers/iommu/s390-iommu.c | 18 ++++++++++++---- + 8 files changed, 76 insertions(+), 43 deletions(-) + +--- a/arch/s390/include/asm/pci.h ++++ b/arch/s390/include/asm/pci.h +@@ -272,6 +272,8 @@ struct zpci_dev *get_zdev_by_fid(u32); + /* DMA */ + int zpci_dma_init(void); + void zpci_dma_exit(void); ++int zpci_dma_init_device(struct zpci_dev *zdev); ++int zpci_dma_exit_device(struct zpci_dev *zdev); + + /* IRQ */ + int __init zpci_irq_init(void); +--- a/arch/s390/include/asm/pci_dma.h ++++ b/arch/s390/include/asm/pci_dma.h +@@ -182,8 +182,6 @@ static inline unsigned long *get_st_pto( + } + + /* Prototypes */ +-int zpci_dma_init_device(struct zpci_dev *); +-void zpci_dma_exit_device(struct zpci_dev *); + void dma_free_seg_table(unsigned long); + unsigned long *dma_alloc_cpu_table(void); + void dma_cleanup_tables(unsigned long *); +--- a/arch/s390/pci/pci.c ++++ b/arch/s390/pci/pci.c +@@ -113,13 +113,16 @@ int zpci_register_ioat(struct zpci_dev * + { + u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT); + struct zpci_fib fib = {0}; +- u8 status; ++ u8 cc, status; + + WARN_ON_ONCE(iota & 0x3fff); + fib.pba = base; + fib.pal = limit; + fib.iota = iota | ZPCI_IOTA_RTTO_FLAG; +- return zpci_mod_fc(req, &fib, &status) ? -EIO : 0; ++ cc = zpci_mod_fc(req, &fib, &status); ++ if (cc) ++ zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status); ++ return cc; + } + + /* Modify PCI: Unregister I/O address translation parameters */ +@@ -130,9 +133,9 @@ int zpci_unregister_ioat(struct zpci_dev + u8 cc, status; + + cc = zpci_mod_fc(req, &fib, &status); +- if (cc == 3) /* Function already gone. */ +- cc = 0; +- return cc ? -EIO : 0; ++ if (cc) ++ zpci_dbg(3, "unreg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status); ++ return cc; + } + + /* Modify PCI: Set PCI function measurement parameters */ +@@ -660,24 +663,12 @@ void zpci_free_domain(int domain) + int zpci_enable_device(struct zpci_dev *zdev) + { + u32 fh = zdev->fh; +- int rc; ++ int rc = 0; + +- if (clp_enable_fh(zdev, &fh, ZPCI_NR_DMA_SPACES)) { ++ if (clp_enable_fh(zdev, &fh, ZPCI_NR_DMA_SPACES)) + rc = -EIO; +- goto out; +- } +- zdev->fh = fh; +- +- rc = zpci_dma_init_device(zdev); +- if (rc) +- goto out_dma; +- +- return 0; +- +-out_dma: +- clp_disable_fh(zdev, &fh); +-out: +- zdev->fh = fh; ++ else ++ zdev->fh = fh; + return rc; + } + +@@ -686,9 +677,6 @@ int zpci_disable_device(struct zpci_dev + u32 fh = zdev->fh; + int cc, rc = 0; + +- zpci_dma_exit_device(zdev); +- if (!zdev_enabled(zdev)) +- return 0; + cc = clp_disable_fh(zdev, &fh); + if (!cc) { + zdev->fh = fh; +@@ -814,6 +802,11 @@ int zpci_deconfigure_device(struct zpci_ + if (zdev->zbus->bus) + zpci_bus_remove_device(zdev, false); + ++ if (zdev->dma_table) { ++ rc = zpci_dma_exit_device(zdev); ++ if (rc) ++ return rc; ++ } + if (zdev_enabled(zdev)) { + rc = zpci_disable_device(zdev); + if (rc) +@@ -837,6 +830,8 @@ void zpci_release_device(struct kref *kr + if (zdev->zbus->bus) + zpci_bus_remove_device(zdev, false); + ++ if (zdev->dma_table) ++ zpci_dma_exit_device(zdev); + if (zdev_enabled(zdev)) + zpci_disable_device(zdev); + +--- a/arch/s390/pci/pci_bus.c ++++ b/arch/s390/pci/pci_bus.c +@@ -49,6 +49,11 @@ static int zpci_bus_prepare_device(struc + rc = zpci_enable_device(zdev); + if (rc) + return rc; ++ rc = zpci_dma_init_device(zdev); ++ if (rc) { ++ zpci_disable_device(zdev); ++ return rc; ++ } + } + + if (!zdev->has_resources) { +--- a/arch/s390/pci/pci_dma.c ++++ b/arch/s390/pci/pci_dma.c +@@ -590,10 +590,11 @@ int zpci_dma_init_device(struct zpci_dev + } + + } +- rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, +- (u64) zdev->dma_table); +- if (rc) ++ if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, ++ (u64)zdev->dma_table)) { ++ rc = -EIO; + goto free_bitmap; ++ } + + return 0; + free_bitmap: +@@ -608,17 +609,25 @@ out: + return rc; + } + +-void zpci_dma_exit_device(struct zpci_dev *zdev) ++int zpci_dma_exit_device(struct zpci_dev *zdev) + { ++ int cc = 0; ++ + /* + * At this point, if the device is part of an IOMMU domain, this would + * be a strong hint towards a bug in the IOMMU API (common) code and/or + * simultaneous access via IOMMU and DMA API. So let's issue a warning. + */ + WARN_ON(zdev->s390_domain); +- +- if (zpci_unregister_ioat(zdev, 0)) +- return; ++ if (zdev_enabled(zdev)) ++ cc = zpci_unregister_ioat(zdev, 0); ++ /* ++ * cc == 3 indicates the function is gone already. This can happen ++ * if the function was deconfigured/disabled suddenly and we have not ++ * received a new handle yet. ++ */ ++ if (cc && cc != 3) ++ return -EIO; + + dma_cleanup_tables(zdev->dma_table); + zdev->dma_table = NULL; +@@ -626,8 +635,8 @@ void zpci_dma_exit_device(struct zpci_de + zdev->iommu_bitmap = NULL; + vfree(zdev->lazy_bitmap); + zdev->lazy_bitmap = NULL; +- + zdev->next_bit = 0; ++ return 0; + } + + static int __init dma_alloc_cpu_table_caches(void) +--- a/arch/s390/pci/pci_event.c ++++ b/arch/s390/pci/pci_event.c +@@ -84,7 +84,10 @@ static void zpci_event_hard_deconfigured + /* Even though the device is already gone we still + * need to free zPCI resources as part of the disable. + */ +- zpci_disable_device(zdev); ++ if (zdev->dma_table) ++ zpci_dma_exit_device(zdev); ++ if (zdev_enabled(zdev)) ++ zpci_disable_device(zdev); + zdev->state = ZPCI_FN_STATE_STANDBY; + } + +--- a/arch/s390/pci/pci_sysfs.c ++++ b/arch/s390/pci/pci_sysfs.c +@@ -82,13 +82,26 @@ static ssize_t recover_store(struct devi + pci_lock_rescan_remove(); + if (pci_dev_is_added(pdev)) { + pci_stop_and_remove_bus_device(pdev); +- ret = zpci_disable_device(zdev); +- if (ret) +- goto out; ++ if (zdev->dma_table) { ++ ret = zpci_dma_exit_device(zdev); ++ if (ret) ++ goto out; ++ } ++ ++ if (zdev_enabled(zdev)) { ++ ret = zpci_disable_device(zdev); ++ if (ret) ++ goto out; ++ } + + ret = zpci_enable_device(zdev); + if (ret) + goto out; ++ ret = zpci_dma_init_device(zdev); ++ if (ret) { ++ zpci_disable_device(zdev); ++ goto out; ++ } + pci_rescan_bus(zdev->zbus->bus); + } + out: +--- a/drivers/iommu/s390-iommu.c ++++ b/drivers/iommu/s390-iommu.c +@@ -90,7 +90,7 @@ static int s390_iommu_attach_device(stru + struct zpci_dev *zdev = to_zpci_dev(dev); + struct s390_domain_device *domain_device; + unsigned long flags; +- int rc; ++ int cc, rc; + + if (!zdev) + return -ENODEV; +@@ -99,14 +99,21 @@ static int s390_iommu_attach_device(stru + if (!domain_device) + return -ENOMEM; + +- if (zdev->dma_table) +- zpci_dma_exit_device(zdev); ++ if (zdev->dma_table) { ++ cc = zpci_dma_exit_device(zdev); ++ if (cc) { ++ rc = -EIO; ++ goto out_free; ++ } ++ } + + zdev->dma_table = s390_domain->dma_table; +- rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, ++ cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, + (u64) zdev->dma_table); +- if (rc) ++ if (cc) { ++ rc = -EIO; + goto out_restore; ++ } + + spin_lock_irqsave(&s390_domain->list_lock, flags); + /* First device defines the DMA range limits */ +@@ -130,6 +137,7 @@ static int s390_iommu_attach_device(stru + + out_restore: + zpci_dma_init_device(zdev); ++out_free: + kfree(domain_device); + + return rc; diff --git a/patches.suse/s390-pci-refresh-function-handle-in-iomap b/patches.suse/s390-pci-refresh-function-handle-in-iomap new file mode 100644 index 0000000..f064725 --- /dev/null +++ b/patches.suse/s390-pci-refresh-function-handle-in-iomap @@ -0,0 +1,177 @@ +From: Niklas Schnelle +Date: Wed, 7 Jul 2021 10:42:43 +0200 +Subject: s390/pci: refresh function handle in iomap +Git-commit: 4fe204977096e900cb91a3298b05c794ac24f540 +Patch-mainline: v5.16-rc1 +References: jsc#PED-592 + +The function handle of a PCI function is updated when disabling or +enabling it as well as when the function's availability changes or it +enters the error state. + +Until now this only occurred either while there is no struct pci_dev +associated with the function yet or the function became unavailable. +This meant that leaving a stale function handle in the iomap either +didn't happen because there was no iomap yet or it lead to errors on PCI +access but so would the correct disabled function handle. + +In the future a CLP Set PCI Function Disable/Enable cycle during PCI +device recovery may be done while the device is bound to a driver. In +this case we must update the iomap associated with the now-stale +function handle to ensure that the resulting zPCI instruction references +an accurate function handle. + +Since the function handle is accessed by the PCI accessor helpers +without locking use READ_ONCE()/WRITE_ONCE() to mark this access and +prevent compiler optimizations that would move the load/store. + +With that infrastructure in place let's also properly update the +function handle in the existing cases. This makes sure that in the +future debugging of a zPCI function access through the handle will +show an up to date handle reducing the chance of confusion. Also it +makes sure we have one single place where a zPCI function handle is +updated after initialization. + +Reviewed-by: Pierre Morel +Reviewed-by: Matthew Rosato +Signed-off-by: Niklas Schnelle +Signed-off-by: Vasily Gorbik +Acked-by: Petr Tesarik +--- + arch/s390/include/asm/pci.h | 1 + + arch/s390/pci/pci.c | 36 ++++++++++++++++++++++++++++++++---- + arch/s390/pci/pci_event.c | 6 +++--- + arch/s390/pci/pci_insn.c | 4 ++-- + 4 files changed, 38 insertions(+), 9 deletions(-) + +--- a/arch/s390/include/asm/pci.h ++++ b/arch/s390/include/asm/pci.h +@@ -213,6 +213,7 @@ bool zpci_is_device_configured(struct zp + int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); + int zpci_unregister_ioat(struct zpci_dev *, u8); + void zpci_remove_reserved_devices(void); ++void zpci_update_fh(struct zpci_dev *zdev, u32 fh); + + /* CLP */ + int clp_setup_writeback_mio(void); +--- a/arch/s390/pci/pci.c ++++ b/arch/s390/pci/pci.c +@@ -481,6 +481,34 @@ static void zpci_free_iomap(struct zpci_ + spin_unlock(&zpci_iomap_lock); + } + ++static void zpci_do_update_iomap_fh(struct zpci_dev *zdev, u32 fh) ++{ ++ int bar, idx; ++ ++ spin_lock(&zpci_iomap_lock); ++ for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) { ++ if (!zdev->bars[bar].size) ++ continue; ++ idx = zdev->bars[bar].map_idx; ++ if (!zpci_iomap_start[idx].count) ++ continue; ++ WRITE_ONCE(zpci_iomap_start[idx].fh, zdev->fh); ++ } ++ spin_unlock(&zpci_iomap_lock); ++} ++ ++void zpci_update_fh(struct zpci_dev *zdev, u32 fh) ++{ ++ if (!fh || zdev->fh == fh) ++ return; ++ ++ zdev->fh = fh; ++ if (zpci_use_mio(zdev)) ++ return; ++ if (zdev->has_resources && zdev_enabled(zdev)) ++ zpci_do_update_iomap_fh(zdev, fh); ++} ++ + static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start, + unsigned long size, unsigned long flags) + { +@@ -668,7 +696,7 @@ int zpci_enable_device(struct zpci_dev * + if (clp_enable_fh(zdev, &fh, ZPCI_NR_DMA_SPACES)) + rc = -EIO; + else +- zdev->fh = fh; ++ zpci_update_fh(zdev, fh); + return rc; + } + +@@ -679,14 +707,14 @@ int zpci_disable_device(struct zpci_dev + + cc = clp_disable_fh(zdev, &fh); + if (!cc) { +- zdev->fh = fh; ++ zpci_update_fh(zdev, fh); + } else if (cc == CLP_RC_SETPCIFN_ALRDY) { + pr_info("Disabling PCI function %08x had no effect as it was already disabled\n", + zdev->fid); + /* Function is already disabled - update handle */ + rc = clp_refresh_fh(zdev->fid, &fh); + if (!rc) { +- zdev->fh = fh; ++ zpci_update_fh(zdev, fh); + rc = -EINVAL; + } + } else { +@@ -776,7 +804,7 @@ int zpci_scan_configured_device(struct z + { + int rc; + +- zdev->fh = fh; ++ zpci_update_fh(zdev, fh); + /* the PCI function will be scanned once function 0 appears */ + if (!zdev->zbus->bus) + return 0; +--- a/arch/s390/pci/pci_event.c ++++ b/arch/s390/pci/pci_event.c +@@ -76,7 +76,7 @@ void zpci_event_error(void *data) + + static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) + { +- zdev->fh = fh; ++ zpci_update_fh(zdev, fh); + /* Give the driver a hint that the function is + * already unusable. + */ +@@ -117,7 +117,7 @@ static void __zpci_event_availability(st + if (!zdev) + zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY); + else +- zdev->fh = ccdf->fh; ++ zpci_update_fh(zdev, ccdf->fh); + break; + case 0x0303: /* Deconfiguration requested */ + if (zdev) { +@@ -126,7 +126,7 @@ static void __zpci_event_availability(st + */ + if (zdev->state != ZPCI_FN_STATE_CONFIGURED) + break; +- zdev->fh = ccdf->fh; ++ zpci_update_fh(zdev, ccdf->fh); + zpci_deconfigure_device(zdev); + } + break; +--- a/arch/s390/pci/pci_insn.c ++++ b/arch/s390/pci/pci_insn.c +@@ -163,7 +163,7 @@ static inline int zpci_load_fh(u64 *data + unsigned long len) + { + struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)]; +- u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len); ++ u64 req = ZPCI_CREATE_REQ(READ_ONCE(entry->fh), entry->bar, len); + + return __zpci_load(data, req, ZPCI_OFFSET(addr)); + } +@@ -244,7 +244,7 @@ static inline int zpci_store_fh(const vo + unsigned long len) + { + struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)]; +- u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len); ++ u64 req = ZPCI_CREATE_REQ(READ_ONCE(entry->fh), entry->bar, len); + + return __zpci_store(data, req, ZPCI_OFFSET(addr)); + } diff --git a/patches.suse/s390-pci-simplify-CLP-List-PCI-handling b/patches.suse/s390-pci-simplify-CLP-List-PCI-handling new file mode 100644 index 0000000..e145f6d --- /dev/null +++ b/patches.suse/s390-pci-simplify-CLP-List-PCI-handling @@ -0,0 +1,358 @@ +From: Niklas Schnelle +Date: Thu, 22 Jul 2021 11:44:08 +0200 +Subject: s390/pci: simplify CLP List PCI handling +Git-commit: cc049eecfb7adc4bfecd05eb25e425d8def96fce +Patch-mainline: v5.15-rc1 +References: jsc#PED-592 + +Currently clp_get_state() and clp_refresh_fh() awkwardly use the +clp_list_pci() callback mechanism to find the entry for a specific FID +and update its zdev, respectively return its state. + +This is both needlessly complex and means we are always going through +the entire PCI function list even if the FID has already been found. +Instead lets introduce a clp_find_pci() function to find a specific +entry and share the CLP List PCI request handling code with +clp_list_pci(). + +With that in place we can also easily make the function handle a simple +out parameter instead of directly altering the zdev allowing easier +access to the updated function handle by the caller. + +Reviewed-by: Matthew Rosato +Signed-off-by: Niklas Schnelle +Signed-off-by: Heiko Carstens +Acked-by: Petr Tesarik +--- + arch/s390/include/asm/pci.h | 6 - + arch/s390/pci/pci.c | 28 ++++--- + arch/s390/pci/pci_clp.c | 155 +++++++++++++++++++++++--------------------- + 3 files changed, 102 insertions(+), 87 deletions(-) + +--- a/arch/s390/include/asm/pci.h ++++ b/arch/s390/include/asm/pci.h +@@ -216,10 +216,10 @@ void zpci_remove_reserved_devices(void); + int clp_setup_writeback_mio(void); + int clp_scan_pci_devices(void); + int clp_query_pci_fn(struct zpci_dev *zdev); +-int clp_enable_fh(struct zpci_dev *, u8); +-int clp_disable_fh(struct zpci_dev *); ++int clp_enable_fh(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as); ++int clp_disable_fh(struct zpci_dev *zdev, u32 *fh); + int clp_get_state(u32 fid, enum zpci_state *state); +-int clp_refresh_fh(u32 fid); ++int clp_refresh_fh(u32 fid, u32 *fh); + + /* UID */ + void update_uid_checking(bool new); +--- a/arch/s390/pci/pci.c ++++ b/arch/s390/pci/pci.c +@@ -659,12 +659,14 @@ void zpci_free_domain(int domain) + + int zpci_enable_device(struct zpci_dev *zdev) + { ++ u32 fh = zdev->fh; + int rc; + +- if (clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES)) { ++ if (clp_enable_fh(zdev, &fh, ZPCI_NR_DMA_SPACES)) { + rc = -EIO; + goto out; + } ++ zdev->fh = fh; + + rc = zpci_dma_init_device(zdev); + if (rc) +@@ -673,29 +675,33 @@ int zpci_enable_device(struct zpci_dev * + return 0; + + out_dma: +- clp_disable_fh(zdev); ++ clp_disable_fh(zdev, &fh); + out: ++ zdev->fh = fh; + return rc; + } + + int zpci_disable_device(struct zpci_dev *zdev) + { ++ u32 fh = zdev->fh; + int cc, rc = 0; + + zpci_dma_exit_device(zdev); +- /* +- * The zPCI function may already be disabled by the platform, this is +- * detected in clp_disable_fh() which becomes a no-op. +- */ +- cc = clp_disable_fh(zdev); +- if (cc == CLP_RC_SETPCIFN_ALRDY) { ++ if (!zdev_enabled(zdev)) ++ return 0; ++ cc = clp_disable_fh(zdev, &fh); ++ if (!cc) { ++ zdev->fh = fh; ++ } else if (cc == CLP_RC_SETPCIFN_ALRDY) { + pr_info("Disabling PCI function %08x had no effect as it was already disabled\n", + zdev->fid); + /* Function is already disabled - update handle */ +- rc = clp_refresh_fh(zdev->fid); +- if (!rc) ++ rc = clp_refresh_fh(zdev->fid, &fh); ++ if (!rc) { ++ zdev->fh = fh; + rc = -EINVAL; +- } else if (cc) { ++ } ++ } else { + rc = -EIO; + } + return rc; +--- a/arch/s390/pci/pci_clp.c ++++ b/arch/s390/pci/pci_clp.c +@@ -215,17 +215,19 @@ out: + /** + * clp_set_pci_fn() - Execute a command on a PCI function + * @zdev: Function that will be affected ++ * @fh: Out parameter for updated function handle + * @nr_dma_as: DMA address space number + * @command: The command code to execute + * + * Returns: 0 on success, < 0 for Linux errors (e.g. -ENOMEM), and + * > 0 for non-success platform responses + */ +-static int clp_set_pci_fn(struct zpci_dev *zdev, u8 nr_dma_as, u8 command) ++static int clp_set_pci_fn(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as, u8 command) + { + struct clp_req_rsp_set_pci *rrb; + int rc, retries = 100; + ++ *fh = 0; + rrb = clp_alloc_block(GFP_KERNEL); + if (!rrb) + return -ENOMEM; +@@ -249,7 +251,7 @@ static int clp_set_pci_fn(struct zpci_de + } while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY); + + if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { +- zdev->fh = rrb->response.fh; ++ *fh = rrb->response.fh; + } else { + zpci_err("Set PCI FN:\n"); + zpci_err_clp(rrb->response.hdr.rsp, rc); +@@ -294,31 +296,62 @@ int clp_setup_writeback_mio(void) + return rc; + } + +-int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as) ++int clp_enable_fh(struct zpci_dev *zdev, u32 *fh, u8 nr_dma_as) + { + int rc; + +- rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_PCI_FN); +- zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); ++ rc = clp_set_pci_fn(zdev, fh, nr_dma_as, CLP_SET_ENABLE_PCI_FN); ++ zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, *fh, rc); + if (!rc && zpci_use_mio(zdev)) { +- rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_MIO); ++ rc = clp_set_pci_fn(zdev, fh, nr_dma_as, CLP_SET_ENABLE_MIO); + zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", +- zdev->fid, zdev->fh, rc); ++ zdev->fid, *fh, rc); + if (rc) +- clp_disable_fh(zdev); ++ clp_disable_fh(zdev, fh); + } + return rc; + } + +-int clp_disable_fh(struct zpci_dev *zdev) ++int clp_disable_fh(struct zpci_dev *zdev, u32 *fh) + { + int rc; + + if (!zdev_enabled(zdev)) + return 0; + +- rc = clp_set_pci_fn(zdev, 0, CLP_SET_DISABLE_PCI_FN); +- zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); ++ rc = clp_set_pci_fn(zdev, fh, 0, CLP_SET_DISABLE_PCI_FN); ++ zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, *fh, rc); ++ return rc; ++} ++ ++static int clp_list_pci_req(struct clp_req_rsp_list_pci *rrb, ++ u64 *resume_token, int *nentries) ++{ ++ int rc; ++ ++ memset(rrb, 0, sizeof(*rrb)); ++ rrb->request.hdr.len = sizeof(rrb->request); ++ rrb->request.hdr.cmd = CLP_LIST_PCI; ++ /* store as many entries as possible */ ++ rrb->response.hdr.len = CLP_BLK_SIZE - LIST_PCI_HDR_LEN; ++ rrb->request.resume_token = *resume_token; ++ ++ /* Get PCI function handle list */ ++ rc = clp_req(rrb, CLP_LPS_PCI); ++ if (rc || rrb->response.hdr.rsp != CLP_RC_OK) { ++ zpci_err("List PCI FN:\n"); ++ zpci_err_clp(rrb->response.hdr.rsp, rc); ++ return -EIO; ++ } ++ ++ update_uid_checking(rrb->response.uid_checking); ++ WARN_ON_ONCE(rrb->response.entry_size != ++ sizeof(struct clp_fh_list_entry)); ++ ++ *nentries = (rrb->response.hdr.len - LIST_PCI_HDR_LEN) / ++ rrb->response.entry_size; ++ *resume_token = rrb->response.resume_token; ++ + return rc; + } + +@@ -326,38 +359,40 @@ static int clp_list_pci(struct clp_req_r + void (*cb)(struct clp_fh_list_entry *, void *)) + { + u64 resume_token = 0; +- int entries, i, rc; ++ int nentries, i, rc; + + do { +- memset(rrb, 0, sizeof(*rrb)); +- rrb->request.hdr.len = sizeof(rrb->request); +- rrb->request.hdr.cmd = CLP_LIST_PCI; +- /* store as many entries as possible */ +- rrb->response.hdr.len = CLP_BLK_SIZE - LIST_PCI_HDR_LEN; +- rrb->request.resume_token = resume_token; +- +- /* Get PCI function handle list */ +- rc = clp_req(rrb, CLP_LPS_PCI); +- if (rc || rrb->response.hdr.rsp != CLP_RC_OK) { +- zpci_err("List PCI FN:\n"); +- zpci_err_clp(rrb->response.hdr.rsp, rc); +- rc = -EIO; +- goto out; +- } ++ rc = clp_list_pci_req(rrb, &resume_token, &nentries); ++ if (rc) ++ return rc; ++ for (i = 0; i < nentries; i++) ++ cb(&rrb->response.fh_list[i], data); ++ } while (resume_token); + +- update_uid_checking(rrb->response.uid_checking); +- WARN_ON_ONCE(rrb->response.entry_size != +- sizeof(struct clp_fh_list_entry)); ++ return rc; ++} + +- entries = (rrb->response.hdr.len - LIST_PCI_HDR_LEN) / +- rrb->response.entry_size; ++static int clp_find_pci(struct clp_req_rsp_list_pci *rrb, u32 fid, ++ struct clp_fh_list_entry *entry) ++{ ++ struct clp_fh_list_entry *fh_list; ++ u64 resume_token = 0; ++ int nentries, i, rc; + +- resume_token = rrb->response.resume_token; +- for (i = 0; i < entries; i++) +- cb(&rrb->response.fh_list[i], data); ++ do { ++ rc = clp_list_pci_req(rrb, &resume_token, &nentries); ++ if (rc) ++ return rc; ++ for (i = 0; i < nentries; i++) { ++ fh_list = rrb->response.fh_list; ++ if (fh_list[i].fid == fid) { ++ *entry = fh_list[i]; ++ return 0; ++ } ++ } + } while (resume_token); +-out: +- return rc; ++ ++ return -ENODEV; + } + + static void __clp_add(struct clp_fh_list_entry *entry, void *data) +@@ -387,67 +422,41 @@ int clp_scan_pci_devices(void) + return rc; + } + +-static void __clp_refresh_fh(struct clp_fh_list_entry *entry, void *data) +-{ +- struct zpci_dev *zdev; +- u32 fid = *((u32 *)data); +- +- if (!entry->vendor_id || fid != entry->fid) +- return; +- +- zdev = get_zdev_by_fid(fid); +- if (!zdev) +- return; +- +- zdev->fh = entry->fh; +-} +- + /* +- * Refresh the function handle of the function matching @fid ++ * Get the current function handle of the function matching @fid + */ +-int clp_refresh_fh(u32 fid) ++int clp_refresh_fh(u32 fid, u32 *fh) + { + struct clp_req_rsp_list_pci *rrb; ++ struct clp_fh_list_entry entry; + int rc; + + rrb = clp_alloc_block(GFP_NOWAIT); + if (!rrb) + return -ENOMEM; + +- rc = clp_list_pci(rrb, &fid, __clp_refresh_fh); ++ rc = clp_find_pci(rrb, fid, &entry); ++ if (!rc) ++ *fh = entry.fh; + + clp_free_block(rrb); + return rc; + } + +-struct clp_state_data { +- u32 fid; +- enum zpci_state state; +-}; +- +-static void __clp_get_state(struct clp_fh_list_entry *entry, void *data) +-{ +- struct clp_state_data *sd = data; +- +- if (entry->fid != sd->fid) +- return; +- +- sd->state = entry->config_state; +-} +- + int clp_get_state(u32 fid, enum zpci_state *state) + { + struct clp_req_rsp_list_pci *rrb; +- struct clp_state_data sd = {fid, ZPCI_FN_STATE_RESERVED}; ++ struct clp_fh_list_entry entry; + int rc; + ++ *state = ZPCI_FN_STATE_RESERVED; + rrb = clp_alloc_block(GFP_ATOMIC); + if (!rrb) + return -ENOMEM; + +- rc = clp_list_pci(rrb, &sd, __clp_get_state); ++ rc = clp_find_pci(rrb, fid, &entry); + if (!rc) +- *state = sd.state; ++ *state = entry.config_state; + + clp_free_block(rrb); + return rc; diff --git a/patches.suse/s390-remove-xpram-device-driver.patch b/patches.suse/s390-remove-xpram-device-driver.patch new file mode 100644 index 0000000..155e701 --- /dev/null +++ b/patches.suse/s390-remove-xpram-device-driver.patch @@ -0,0 +1,507 @@ +From: Heiko Carstens +Date: Mon, 6 Sep 2021 13:59:26 +0200 +Subject: [PATCH] s390: remove xpram device driver +Git-commit: 68c32eb2707aed0a3be1a60b0f206943a25e8f34 +Patch-Mainline: v5.15 +References: bsc#1205381 + +Support for expanded storage was only available until z13 and z/VM 6.3 +respectively. However there haven't been any use cases a long time +before for this device driver. +Therefore remove it. + +Acked-by: Christian Borntraeger +Signed-off-by: Heiko Carstens +Acked-by: Hannes Reinecke +--- + arch/s390/configs/defconfig | 1 - + arch/s390/configs/zfcpdump_defconfig | 1 - + drivers/s390/block/Kconfig | 11 - + drivers/s390/block/Makefile | 1 - + drivers/s390/block/xpram.c | 416 --------------------------- + 5 files changed, 430 deletions(-) + delete mode 100644 drivers/s390/block/xpram.c + +diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig +index e1642d2cba59..56a1cc85c5d7 100644 +--- a/arch/s390/configs/defconfig ++++ b/arch/s390/configs/defconfig +@@ -397,7 +397,6 @@ CONFIG_BLK_DEV_DRBD=m + CONFIG_BLK_DEV_NBD=m + CONFIG_BLK_DEV_RAM=y + CONFIG_BLK_DEV_RAM_SIZE=32768 +-# CONFIG_BLK_DEV_XPRAM is not set + CONFIG_VIRTIO_BLK=y + CONFIG_BLK_DEV_RBD=m + CONFIG_BLK_DEV_NVME=m +diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig +index d576aaab27c9..aceccf3b9a88 100644 +--- a/arch/s390/configs/zfcpdump_defconfig ++++ b/arch/s390/configs/zfcpdump_defconfig +@@ -35,7 +35,6 @@ CONFIG_NET=y + # CONFIG_ETHTOOL_NETLINK is not set + CONFIG_DEVTMPFS=y + CONFIG_BLK_DEV_RAM=y +-# CONFIG_BLK_DEV_XPRAM is not set + # CONFIG_DCSSBLK is not set + # CONFIG_DASD is not set + CONFIG_ENCLOSURE_SERVICES=y +diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig +index 376f1efbbb86..d0416dbd0cd8 100644 +--- a/drivers/s390/block/Kconfig ++++ b/drivers/s390/block/Kconfig +@@ -2,17 +2,6 @@ + comment "S/390 block device drivers" + depends on S390 && BLOCK + +-config BLK_DEV_XPRAM +- def_tristate m +- prompt "XPRAM disk support" +- depends on S390 && BLOCK +- help +- Select this option if you want to use your expanded storage on S/390 +- or zSeries as a disk. This is useful as a _fast_ swap device if you +- want to access more than 2G of memory when running in 31 bit mode. +- This option is also available as a module which will be called +- xpram. If unsure, say "N". +- + config DCSSBLK + def_tristate m + select FS_DAX_LIMITED +diff --git a/drivers/s390/block/Makefile b/drivers/s390/block/Makefile +index 60c85cff556f..a0a54d2f063f 100644 +--- a/drivers/s390/block/Makefile ++++ b/drivers/s390/block/Makefile +@@ -16,7 +16,6 @@ obj-$(CONFIG_DASD) += dasd_mod.o + obj-$(CONFIG_DASD_DIAG) += dasd_diag_mod.o + obj-$(CONFIG_DASD_ECKD) += dasd_eckd_mod.o + obj-$(CONFIG_DASD_FBA) += dasd_fba_mod.o +-obj-$(CONFIG_BLK_DEV_XPRAM) += xpram.o + obj-$(CONFIG_DCSSBLK) += dcssblk.o + + scm_block-objs := scm_drv.o scm_blk.o +diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c +deleted file mode 100644 +index ce98fab4d43c..000000000000 +--- a/drivers/s390/block/xpram.c ++++ /dev/null +@@ -1,416 +0,0 @@ +-// SPDX-License-Identifier: GPL-2.0 +-/* +- * Xpram.c -- the S/390 expanded memory RAM-disk +- * +- * significant parts of this code are based on +- * the sbull device driver presented in +- * A. Rubini: Linux Device Drivers +- * +- * Author of XPRAM specific coding: Reinhard Buendgen +- * buendgen@de.ibm.com +- * Rewrite for 2.5: Martin Schwidefsky +- * +- * External interfaces: +- * Interfaces to linux kernel +- * xpram_setup: read kernel parameters +- * Device specific file operations +- * xpram_iotcl +- * xpram_open +- * +- * "ad-hoc" partitioning: +- * the expanded memory can be partitioned among several devices +- * (with different minors). The partitioning set up can be +- * set by kernel or module parameters (int devs & int sizes[]) +- * +- * Potential future improvements: +- * generic hard disk support to replace ad-hoc partitioning +- */ +- +-#define KMSG_COMPONENT "xpram" +-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +- +-#include +-#include +-#include /* isdigit, isxdigit */ +-#include +-#include +-#include +-#include +-#include /* HDIO_GETGEO */ +-#include +-#include +-#include +-#include +- +-#define XPRAM_NAME "xpram" +-#define XPRAM_DEVS 1 /* one partition */ +-#define XPRAM_MAX_DEVS 32 /* maximal number of devices (partitions) */ +- +-typedef struct { +- unsigned int size; /* size of xpram segment in pages */ +- unsigned int offset; /* start page of xpram segment */ +-} xpram_device_t; +- +-static xpram_device_t xpram_devices[XPRAM_MAX_DEVS]; +-static unsigned int xpram_sizes[XPRAM_MAX_DEVS]; +-static struct gendisk *xpram_disks[XPRAM_MAX_DEVS]; +-static unsigned int xpram_pages; +-static int xpram_devs; +- +-/* +- * Parameter parsing functions. +- */ +-static int devs = XPRAM_DEVS; +-static char *sizes[XPRAM_MAX_DEVS]; +- +-module_param(devs, int, 0); +-module_param_array(sizes, charp, NULL, 0); +- +-MODULE_PARM_DESC(devs, "number of devices (\"partitions\"), " \ +- "the default is " __MODULE_STRING(XPRAM_DEVS) "\n"); +-MODULE_PARM_DESC(sizes, "list of device (partition) sizes " \ +- "the defaults are 0s \n" \ +- "All devices with size 0 equally partition the " +- "remaining space on the expanded strorage not " +- "claimed by explicit sizes\n"); +-MODULE_LICENSE("GPL"); +- +-/* +- * Copy expanded memory page (4kB) into main memory +- * Arguments +- * page_addr: address of target page +- * xpage_index: index of expandeded memory page +- * Return value +- * 0: if operation succeeds +- * -EIO: if pgin failed +- * -ENXIO: if xpram has vanished +- */ +-static int xpram_page_in (unsigned long page_addr, unsigned int xpage_index) +-{ +- int cc = 2; /* return unused cc 2 if pgin traps */ +- +- asm volatile( +- " .insn rre,0xb22e0000,%1,%2\n" /* pgin %1,%2 */ +- "0: ipm %0\n" +- " srl %0,28\n" +- "1:\n" +- EX_TABLE(0b,1b) +- : "+d" (cc) : "a" (__pa(page_addr)), "d" (xpage_index) : "cc"); +- if (cc == 3) +- return -ENXIO; +- if (cc == 2) +- return -ENXIO; +- if (cc == 1) +- return -EIO; +- return 0; +-} +- +-/* +- * Copy a 4kB page of main memory to an expanded memory page +- * Arguments +- * page_addr: address of source page +- * xpage_index: index of expandeded memory page +- * Return value +- * 0: if operation succeeds +- * -EIO: if pgout failed +- * -ENXIO: if xpram has vanished +- */ +-static long xpram_page_out (unsigned long page_addr, unsigned int xpage_index) +-{ +- int cc = 2; /* return unused cc 2 if pgin traps */ +- +- asm volatile( +- " .insn rre,0xb22f0000,%1,%2\n" /* pgout %1,%2 */ +- "0: ipm %0\n" +- " srl %0,28\n" +- "1:\n" +- EX_TABLE(0b,1b) +- : "+d" (cc) : "a" (__pa(page_addr)), "d" (xpage_index) : "cc"); +- if (cc == 3) +- return -ENXIO; +- if (cc == 2) +- return -ENXIO; +- if (cc == 1) +- return -EIO; +- return 0; +-} +- +-/* +- * Check if xpram is available. +- */ +-static int __init xpram_present(void) +-{ +- unsigned long mem_page; +- int rc; +- +- mem_page = (unsigned long) __get_free_page(GFP_KERNEL); +- if (!mem_page) +- return -ENOMEM; +- rc = xpram_page_in(mem_page, 0); +- free_page(mem_page); +- return rc ? -ENXIO : 0; +-} +- +-/* +- * Return index of the last available xpram page. +- */ +-static unsigned long __init xpram_highest_page_index(void) +-{ +- unsigned int page_index, add_bit; +- unsigned long mem_page; +- +- mem_page = (unsigned long) __get_free_page(GFP_KERNEL); +- if (!mem_page) +- return 0; +- +- page_index = 0; +- add_bit = 1ULL << (sizeof(unsigned int)*8 - 1); +- while (add_bit > 0) { +- if (xpram_page_in(mem_page, page_index | add_bit) == 0) +- page_index |= add_bit; +- add_bit >>= 1; +- } +- +- free_page (mem_page); +- +- return page_index; +-} +- +-/* +- * Block device make request function. +- */ +-static blk_qc_t xpram_submit_bio(struct bio *bio) +-{ +- xpram_device_t *xdev = bio->bi_bdev->bd_disk->private_data; +- struct bio_vec bvec; +- struct bvec_iter iter; +- unsigned int index; +- unsigned long page_addr; +- unsigned long bytes; +- +- blk_queue_split(&bio); +- +- if ((bio->bi_iter.bi_sector & 7) != 0 || +- (bio->bi_iter.bi_size & 4095) != 0) +- /* Request is not page-aligned. */ +- goto fail; +- if ((bio->bi_iter.bi_size >> 12) > xdev->size) +- /* Request size is no page-aligned. */ +- goto fail; +- if ((bio->bi_iter.bi_sector >> 3) > 0xffffffffU - xdev->offset) +- goto fail; +- index = (bio->bi_iter.bi_sector >> 3) + xdev->offset; +- bio_for_each_segment(bvec, bio, iter) { +- page_addr = (unsigned long) +- kmap(bvec.bv_page) + bvec.bv_offset; +- bytes = bvec.bv_len; +- if ((page_addr & 4095) != 0 || (bytes & 4095) != 0) +- /* More paranoia. */ +- goto fail; +- while (bytes > 0) { +- if (bio_data_dir(bio) == READ) { +- if (xpram_page_in(page_addr, index) != 0) +- goto fail; +- } else { +- if (xpram_page_out(page_addr, index) != 0) +- goto fail; +- } +- page_addr += 4096; +- bytes -= 4096; +- index++; +- } +- } +- bio_endio(bio); +- return BLK_QC_T_NONE; +-fail: +- bio_io_error(bio); +- return BLK_QC_T_NONE; +-} +- +-static int xpram_getgeo(struct block_device *bdev, struct hd_geometry *geo) +-{ +- unsigned long size; +- +- /* +- * get geometry: we have to fake one... trim the size to a +- * multiple of 64 (32k): tell we have 16 sectors, 4 heads, +- * whatever cylinders. Tell also that data starts at sector. 4. +- */ +- size = (xpram_pages * 8) & ~0x3f; +- geo->cylinders = size >> 6; +- geo->heads = 4; +- geo->sectors = 16; +- geo->start = 4; +- return 0; +-} +- +-static const struct block_device_operations xpram_devops = +-{ +- .owner = THIS_MODULE, +- .submit_bio = xpram_submit_bio, +- .getgeo = xpram_getgeo, +-}; +- +-/* +- * Setup xpram_sizes array. +- */ +-static int __init xpram_setup_sizes(unsigned long pages) +-{ +- unsigned long mem_needed; +- unsigned long mem_auto; +- unsigned long long size; +- char *sizes_end; +- int mem_auto_no; +- int i; +- +- /* Check number of devices. */ +- if (devs <= 0 || devs > XPRAM_MAX_DEVS) { +- pr_err("%d is not a valid number of XPRAM devices\n",devs); +- return -EINVAL; +- } +- xpram_devs = devs; +- +- /* +- * Copy sizes array to xpram_sizes and align partition +- * sizes to page boundary. +- */ +- mem_needed = 0; +- mem_auto_no = 0; +- for (i = 0; i < xpram_devs; i++) { +- if (sizes[i]) { +- size = simple_strtoull(sizes[i], &sizes_end, 0); +- switch (*sizes_end) { +- case 'g': +- case 'G': +- size <<= 20; +- break; +- case 'm': +- case 'M': +- size <<= 10; +- } +- xpram_sizes[i] = (size + 3) & -4UL; +- } +- if (xpram_sizes[i]) +- mem_needed += xpram_sizes[i]; +- else +- mem_auto_no++; +- } +- +- pr_info(" number of devices (partitions): %d \n", xpram_devs); +- for (i = 0; i < xpram_devs; i++) { +- if (xpram_sizes[i]) +- pr_info(" size of partition %d: %u kB\n", +- i, xpram_sizes[i]); +- else +- pr_info(" size of partition %d to be set " +- "automatically\n",i); +- } +- pr_info(" memory needed (for sized partitions): %lu kB\n", +- mem_needed); +- pr_info(" partitions to be sized automatically: %d\n", +- mem_auto_no); +- +- if (mem_needed > pages * 4) { +- pr_err("Not enough expanded memory available\n"); +- return -EINVAL; +- } +- +- /* +- * partitioning: +- * xpram_sizes[i] != 0; partition i has size xpram_sizes[i] kB +- * else: ; all partitions with zero xpram_sizes[i] +- * partition equally the remaining space +- */ +- if (mem_auto_no) { +- mem_auto = ((pages - mem_needed / 4) / mem_auto_no) * 4; +- pr_info(" automatically determined " +- "partition size: %lu kB\n", mem_auto); +- for (i = 0; i < xpram_devs; i++) +- if (xpram_sizes[i] == 0) +- xpram_sizes[i] = mem_auto; +- } +- return 0; +-} +- +-static int __init xpram_setup_blkdev(void) +-{ +- unsigned long offset; +- int i, rc = -ENOMEM; +- +- for (i = 0; i < xpram_devs; i++) { +- xpram_disks[i] = blk_alloc_disk(NUMA_NO_NODE); +- if (!xpram_disks[i]) +- goto out; +- blk_queue_flag_set(QUEUE_FLAG_NONROT, xpram_disks[i]->queue); +- blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, +- xpram_disks[i]->queue); +- blk_queue_logical_block_size(xpram_disks[i]->queue, 4096); +- } +- +- /* +- * Register xpram major. +- */ +- rc = register_blkdev(XPRAM_MAJOR, XPRAM_NAME); +- if (rc < 0) +- goto out; +- +- /* +- * Setup device structures. +- */ +- offset = 0; +- for (i = 0; i < xpram_devs; i++) { +- struct gendisk *disk = xpram_disks[i]; +- +- xpram_devices[i].size = xpram_sizes[i] / 4; +- xpram_devices[i].offset = offset; +- offset += xpram_devices[i].size; +- disk->major = XPRAM_MAJOR; +- disk->first_minor = i; +- disk->minors = 1; +- disk->fops = &xpram_devops; +- disk->private_data = &xpram_devices[i]; +- sprintf(disk->disk_name, "slram%d", i); +- set_capacity(disk, xpram_sizes[i] << 1); +- add_disk(disk); +- } +- +- return 0; +-out: +- while (i--) +- blk_cleanup_disk(xpram_disks[i]); +- return rc; +-} +- +-/* +- * Finally, the init/exit functions. +- */ +-static void __exit xpram_exit(void) +-{ +- int i; +- for (i = 0; i < xpram_devs; i++) { +- del_gendisk(xpram_disks[i]); +- blk_cleanup_disk(xpram_disks[i]); +- } +- unregister_blkdev(XPRAM_MAJOR, XPRAM_NAME); +-} +- +-static int __init xpram_init(void) +-{ +- int rc; +- +- /* Find out size of expanded memory. */ +- if (xpram_present() != 0) { +- pr_err("No expanded memory available\n"); +- return -ENODEV; +- } +- xpram_pages = xpram_highest_page_index() + 1; +- pr_info(" %u pages expanded memory found (%lu KB).\n", +- xpram_pages, (unsigned long) xpram_pages*4); +- rc = xpram_setup_sizes(xpram_pages); +- if (rc) +- return rc; +- return xpram_setup_blkdev(); +-} +- +-module_init(xpram_init); +-module_exit(xpram_exit); +-- +2.35.3 + diff --git a/patches.suse/s390-uaccess-Add-copy_from-to_user_key-functions b/patches.suse/s390-uaccess-Add-copy_from-to_user_key-functions new file mode 100644 index 0000000..8f0c678 --- /dev/null +++ b/patches.suse/s390-uaccess-Add-copy_from-to_user_key-functions @@ -0,0 +1,239 @@ +From: Janis Schoetterl-Glausch +Date: Fri, 11 Feb 2022 19:22:06 +0100 +Subject: s390/uaccess: Add copy_from/to_user_key functions +Git-commit: 1a82f6ab23659aa01a796d9d444ec9cc63ded26c +Patch-mainline: v5.18-rc1 +References: jsc#PED-579 + +Add copy_from/to_user_key functions, which perform storage key checking. +These functions can be used by KVM for emulating instructions that need +to be key checked. +These functions differ from their non _key counterparts in +include/linux/uaccess.h only in the additional key argument and must be +kept in sync with those. + +Since the existing uaccess implementation on s390 makes use of move +instructions that support having an additional access key supplied, +we can implement raw_copy_from/to_user_key by enhancing the +existing implementation. + +Signed-off-by: Janis Schoetterl-Glausch +Acked-by: Heiko Carstens +Reviewed-by: Christian Borntraeger +Acked-by: Janosch Frank +Link: https://lore.kernel.org/r/20220211182215.2730017-2-scgl@linux.ibm.com +Signed-off-by: Christian Borntraeger +Acked-by: Petr Tesarik +--- + arch/s390/include/asm/uaccess.h | 22 ++++++++++ + arch/s390/lib/uaccess.c | 81 +++++++++++++++++++++++++++++++--------- + 2 files changed, 85 insertions(+), 18 deletions(-) + +--- a/arch/s390/include/asm/uaccess.h ++++ b/arch/s390/include/asm/uaccess.h +@@ -44,6 +44,28 @@ raw_copy_to_user(void __user *to, const + #define INLINE_COPY_TO_USER + #endif + ++unsigned long __must_check ++_copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key); ++ ++static __always_inline unsigned long __must_check ++copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key) ++{ ++ if (likely(check_copy_size(to, n, false))) ++ n = _copy_from_user_key(to, from, n, key); ++ return n; ++} ++ ++unsigned long __must_check ++_copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key); ++ ++static __always_inline unsigned long __must_check ++copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key) ++{ ++ if (likely(check_copy_size(from, n, true))) ++ n = _copy_to_user_key(to, from, n, key); ++ return n; ++} ++ + int __put_user_bad(void) __attribute__((noreturn)); + int __get_user_bad(void) __attribute__((noreturn)); + +--- a/arch/s390/lib/uaccess.c ++++ b/arch/s390/lib/uaccess.c +@@ -59,11 +59,13 @@ static inline int copy_with_mvcos(void) + #endif + + static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, +- unsigned long size) ++ unsigned long size, unsigned long key) + { + unsigned long tmp1, tmp2; + union oac spec = { ++ .oac2.key = key, + .oac2.as = PSW_BITS_AS_SECONDARY, ++ .oac2.k = 1, + .oac2.a = 1, + }; + +@@ -94,19 +96,19 @@ static inline unsigned long copy_from_us + } + + static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, +- unsigned long size) ++ unsigned long size, unsigned long key) + { + unsigned long tmp1, tmp2; + + tmp1 = -256UL; + asm volatile( + " sacf 0\n" +- "0: mvcp 0(%0,%2),0(%1),%3\n" ++ "0: mvcp 0(%0,%2),0(%1),%[key]\n" + "7: jz 5f\n" + "1: algr %0,%3\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" +- "2: mvcp 0(%0,%2),0(%1),%3\n" ++ "2: mvcp 0(%0,%2),0(%1),%[key]\n" + "8: jnz 1b\n" + " j 5f\n" + "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ +@@ -115,7 +117,7 @@ static inline unsigned long copy_from_us + " slgr %4,%1\n" + " clgr %0,%4\n" /* copy crosses next page boundary? */ + " jnh 6f\n" +- "4: mvcp 0(%4,%2),0(%1),%3\n" ++ "4: mvcp 0(%4,%2),0(%1),%[key]\n" + "9: slgr %0,%4\n" + " j 6f\n" + "5: slgr %0,%0\n" +@@ -123,24 +125,49 @@ static inline unsigned long copy_from_us + EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) + EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) +- : : "cc", "memory"); ++ : [key] "d" (key << 4) ++ : "cc", "memory"); + return size; + } + +-unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) ++static unsigned long raw_copy_from_user_key(void *to, const void __user *from, ++ unsigned long n, unsigned long key) + { + if (copy_with_mvcos()) +- return copy_from_user_mvcos(to, from, n); +- return copy_from_user_mvcp(to, from, n); ++ return copy_from_user_mvcos(to, from, n, key); ++ return copy_from_user_mvcp(to, from, n, key); ++} ++ ++unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) ++{ ++ return raw_copy_from_user_key(to, from, n, 0); + } + EXPORT_SYMBOL(raw_copy_from_user); + ++unsigned long _copy_from_user_key(void *to, const void __user *from, ++ unsigned long n, unsigned long key) ++{ ++ unsigned long res = n; ++ ++ might_fault(); ++ if (!should_fail_usercopy()) { ++ instrument_copy_from_user(to, from, n); ++ res = raw_copy_from_user_key(to, from, n, key); ++ } ++ if (unlikely(res)) ++ memset(to + (n - res), 0, res); ++ return res; ++} ++EXPORT_SYMBOL(_copy_from_user_key); ++ + static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, +- unsigned long size) ++ unsigned long size, unsigned long key) + { + unsigned long tmp1, tmp2; + union oac spec = { ++ .oac1.key = key, + .oac1.as = PSW_BITS_AS_SECONDARY, ++ .oac1.k = 1, + .oac1.a = 1, + }; + +@@ -171,19 +198,19 @@ static inline unsigned long copy_to_user + } + + static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, +- unsigned long size) ++ unsigned long size, unsigned long key) + { + unsigned long tmp1, tmp2; + + tmp1 = -256UL; + asm volatile( + " sacf 0\n" +- "0: mvcs 0(%0,%1),0(%2),%3\n" ++ "0: mvcs 0(%0,%1),0(%2),%[key]\n" + "7: jz 5f\n" + "1: algr %0,%3\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" +- "2: mvcs 0(%0,%1),0(%2),%3\n" ++ "2: mvcs 0(%0,%1),0(%2),%[key]\n" + "8: jnz 1b\n" + " j 5f\n" + "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ +@@ -192,7 +219,7 @@ static inline unsigned long copy_to_user + " slgr %4,%1\n" + " clgr %0,%4\n" /* copy crosses next page boundary? */ + " jnh 6f\n" +- "4: mvcs 0(%4,%1),0(%2),%3\n" ++ "4: mvcs 0(%4,%1),0(%2),%[key]\n" + "9: slgr %0,%4\n" + " j 6f\n" + "5: slgr %0,%0\n" +@@ -200,18 +227,36 @@ static inline unsigned long copy_to_user + EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) + EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) +- : : "cc", "memory"); ++ : [key] "d" (key << 4) ++ : "cc", "memory"); + return size; + } + +-unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) ++static unsigned long raw_copy_to_user_key(void __user *to, const void *from, ++ unsigned long n, unsigned long key) + { + if (copy_with_mvcos()) +- return copy_to_user_mvcos(to, from, n); +- return copy_to_user_mvcs(to, from, n); ++ return copy_to_user_mvcos(to, from, n, key); ++ return copy_to_user_mvcs(to, from, n, key); ++} ++ ++unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) ++{ ++ return raw_copy_to_user_key(to, from, n, 0); + } + EXPORT_SYMBOL(raw_copy_to_user); + ++unsigned long _copy_to_user_key(void __user *to, const void *from, ++ unsigned long n, unsigned long key) ++{ ++ might_fault(); ++ if (should_fail_usercopy()) ++ return n; ++ instrument_copy_to_user(to, from, n); ++ return raw_copy_to_user_key(to, from, n, key); ++} ++EXPORT_SYMBOL(_copy_to_user_key); ++ + static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from, + unsigned long size) + { diff --git a/patches.suse/s390-uaccess-add-missing-EX_TABLE-entries-to-__clear_user b/patches.suse/s390-uaccess-add-missing-EX_TABLE-entries-to-__clear_user index 06e05f0..f4a61e0 100644 --- a/patches.suse/s390-uaccess-add-missing-EX_TABLE-entries-to-__clear_user +++ b/patches.suse/s390-uaccess-add-missing-EX_TABLE-entries-to-__clear_user @@ -70,7 +70,7 @@ Acked-by: Petr Tesarik return size; @@ -275,7 +277,7 @@ static inline unsigned long clear_user_m asm volatile( - " llilh 0,%[spec]\n" + " lr 0,%[spec]\n" "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n" - " jz 4f\n" + "6: jz 4f\n" @@ -89,7 +89,7 @@ Acked-by: Petr Tesarik - EX_TABLE(0b,2b) EX_TABLE(3b,5b) + EX_TABLE(0b,2b) EX_TABLE(6b,2b) EX_TABLE(3b,5b) EX_TABLE(7b,5b) : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2) - : "a" (empty_zero_page), [spec] "K" (0x81UL) + : "a" (empty_zero_page), [spec] "d" (spec.val) : "cc", "memory", "0"); @@ -319,13 +321,14 @@ static inline unsigned long clear_user_x " slgr %0,%2\n" diff --git a/patches.suse/s390-uaccess-fix-compile-error b/patches.suse/s390-uaccess-fix-compile-error new file mode 100644 index 0000000..982e7ec --- /dev/null +++ b/patches.suse/s390-uaccess-fix-compile-error @@ -0,0 +1,45 @@ +From: Heiko Carstens +Date: Sat, 22 Jan 2022 10:24:31 +0100 +Subject: s390/uaccess: fix compile error +Git-commit: 3d787b392d169d4a2e3aee6ac6dfd6ec39722cf2 +Patch-mainline: v5.17-rc2 +References: jsc#PED-579 + +Compiling with e.g MARCH=z900 results in compile errors: + + arch/s390/lib/uaccess.c: In function 'copy_from_user_mvcos': +>> arch/s390/lib/uaccess.c:65:15: error: variable 'spec' has initializer but incomplete type + 65 | union oac spec = { + +Therefore make definition of union oac visible for all MARCHs. + +Reported-by: kernel test robot +Cc: Nico Boehr +Cc: Janis Schoetterl-Glausch +Fixes: 012a224e1fa3 ("s390/uaccess: introduce bit field for OAC specifier") +Signed-off-by: Heiko Carstens +Acked-by: Petr Tesarik +--- + arch/s390/include/asm/uaccess.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/s390/include/asm/uaccess.h ++++ b/arch/s390/include/asm/uaccess.h +@@ -47,8 +47,6 @@ raw_copy_to_user(void __user *to, const + int __put_user_bad(void) __attribute__((noreturn)); + int __get_user_bad(void) __attribute__((noreturn)); + +-#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES +- + union oac { + unsigned int val; + struct { +@@ -71,6 +69,8 @@ union oac { + }; + }; + ++#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES ++ + #define __put_get_user_asm(to, from, size, oac_spec) \ + ({ \ + int __rc; \ diff --git a/patches.suse/s390-uaccess-introduce-bit-field-for-OAC-specifier b/patches.suse/s390-uaccess-introduce-bit-field-for-OAC-specifier new file mode 100644 index 0000000..1df969d --- /dev/null +++ b/patches.suse/s390-uaccess-introduce-bit-field-for-OAC-specifier @@ -0,0 +1,269 @@ +From: Nico Boehr +Date: Tue, 11 Jan 2022 11:00:03 +0100 +Subject: s390/uaccess: introduce bit field for OAC specifier +Git-commit: 012a224e1fa31fc256aab921f691598e03db6018 +Patch-mainline: v5.17-rc1 +References: jsc#PED-579 + +Previously, we've used magic values to specify the OAC +(operand-access control) for mvcos. + +Instead we introduce a bit field for it. + +When using a bit field, we cannot use an immediate value with K +constraint anymore, since GCC older than 10 doesn't recognize +the bit field union as a compile time constant. +To make things work with older compilers, +load the OAC value through a register. + +Bloat-o-meter reports a slight increase in kernel size with this change: +Total: Before=15692135, After=15693015, chg +0.01% + +Signed-off-by: Nico Boehr +Co-developed-by: Janis Schoetterl-Glausch +Signed-off-by: Janis Schoetterl-Glausch +Link: https://lore.kernel.org/r/20220111100003.743116-1-scgl@linux.ibm.com +Cc: Alexander Gordeev +Cc: Christian Borntraeger +Cc: Vasily Gorbik +Cc: Sven Schnelle +Signed-off-by: Heiko Carstens +Acked-by: Petr Tesarik +--- + arch/s390/include/asm/uaccess.h | 120 +++++++++++++++++++++++++--------------- + arch/s390/lib/uaccess.c | 24 ++++++-- + 2 files changed, 95 insertions(+), 49 deletions(-) + +--- a/arch/s390/include/asm/uaccess.h ++++ b/arch/s390/include/asm/uaccess.h +@@ -49,51 +49,85 @@ int __get_user_bad(void) __attribute__(( + + #ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + +-#define __put_get_user_asm(to, from, size, insn) \ +-({ \ +- int __rc; \ +- \ +- asm volatile( \ +- insn " 0,%[spec]\n" \ +- "0: mvcos %[_to],%[_from],%[_size]\n" \ +- "1: xr %[rc],%[rc]\n" \ +- "2:\n" \ +- ".pushsection .fixup, \"ax\"\n" \ +- "3: lhi %[rc],%[retval]\n" \ +- " jg 2b\n" \ +- ".popsection\n" \ +- EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ +- : [rc] "=&d" (__rc), [_to] "+Q" (*(to)) \ +- : [_size] "d" (size), [_from] "Q" (*(from)), \ +- [retval] "K" (-EFAULT), [spec] "K" (0x81UL) \ +- : "cc", "0"); \ +- __rc; \ ++union oac { ++ unsigned int val; ++ struct { ++ struct { ++ unsigned short key : 4; ++ unsigned short : 4; ++ unsigned short as : 2; ++ unsigned short : 4; ++ unsigned short k : 1; ++ unsigned short a : 1; ++ } oac1; ++ struct { ++ unsigned short key : 4; ++ unsigned short : 4; ++ unsigned short as : 2; ++ unsigned short : 4; ++ unsigned short k : 1; ++ unsigned short a : 1; ++ } oac2; ++ }; ++}; ++ ++#define __put_get_user_asm(to, from, size, oac_spec) \ ++({ \ ++ int __rc; \ ++ \ ++ asm volatile( \ ++ " lr 0,%[spec]\n" \ ++ "0: mvcos %[_to],%[_from],%[_size]\n" \ ++ "1: xr %[rc],%[rc]\n" \ ++ "2:\n" \ ++ ".pushsection .fixup, \"ax\"\n" \ ++ "3: lhi %[rc],%[retval]\n" \ ++ " jg 2b\n" \ ++ ".popsection\n" \ ++ EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ ++ : [rc] "=&d" (__rc), [_to] "+Q" (*(to)) \ ++ : [_size] "d" (size), [_from] "Q" (*(from)), \ ++ [retval] "K" (-EFAULT), [spec] "d" (oac_spec.val) \ ++ : "cc", "0"); \ ++ __rc; \ + }) + ++#define __put_user_asm(to, from, size) \ ++ __put_get_user_asm(to, from, size, ((union oac) { \ ++ .oac1.as = PSW_BITS_AS_SECONDARY, \ ++ .oac1.a = 1 \ ++ })) ++ ++#define __get_user_asm(to, from, size) \ ++ __put_get_user_asm(to, from, size, ((union oac) { \ ++ .oac2.as = PSW_BITS_AS_SECONDARY, \ ++ .oac2.a = 1 \ ++ })) \ ++ + static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) + { + int rc; + + switch (size) { + case 1: +- rc = __put_get_user_asm((unsigned char __user *)ptr, +- (unsigned char *)x, +- size, "llilh"); ++ rc = __put_user_asm((unsigned char __user *)ptr, ++ (unsigned char *)x, ++ size); + break; + case 2: +- rc = __put_get_user_asm((unsigned short __user *)ptr, +- (unsigned short *)x, +- size, "llilh"); ++ rc = __put_user_asm((unsigned short __user *)ptr, ++ (unsigned short *)x, ++ size); + break; + case 4: +- rc = __put_get_user_asm((unsigned int __user *)ptr, +- (unsigned int *)x, +- size, "llilh"); ++ rc = __put_user_asm((unsigned int __user *)ptr, ++ (unsigned int *)x, ++ size); + break; + case 8: +- rc = __put_get_user_asm((unsigned long __user *)ptr, +- (unsigned long *)x, +- size, "llilh"); ++ rc = __put_user_asm((unsigned long __user *)ptr, ++ (unsigned long *)x, ++ size); + break; + default: + __put_user_bad(); +@@ -108,24 +142,24 @@ static __always_inline int __get_user_fn + + switch (size) { + case 1: +- rc = __put_get_user_asm((unsigned char *)x, +- (unsigned char __user *)ptr, +- size, "lghi"); ++ rc = __get_user_asm((unsigned char *)x, ++ (unsigned char __user *)ptr, ++ size); + break; + case 2: +- rc = __put_get_user_asm((unsigned short *)x, +- (unsigned short __user *)ptr, +- size, "lghi"); ++ rc = __get_user_asm((unsigned short *)x, ++ (unsigned short __user *)ptr, ++ size); + break; + case 4: +- rc = __put_get_user_asm((unsigned int *)x, +- (unsigned int __user *)ptr, +- size, "lghi"); ++ rc = __get_user_asm((unsigned int *)x, ++ (unsigned int __user *)ptr, ++ size); + break; + case 8: +- rc = __put_get_user_asm((unsigned long *)x, +- (unsigned long __user *)ptr, +- size, "lghi"); ++ rc = __get_user_asm((unsigned long *)x, ++ (unsigned long __user *)ptr, ++ size); + break; + default: + __get_user_bad(); +--- a/arch/s390/lib/uaccess.c ++++ b/arch/s390/lib/uaccess.c +@@ -62,10 +62,14 @@ static inline unsigned long copy_from_us + unsigned long size) + { + unsigned long tmp1, tmp2; ++ union oac spec = { ++ .oac2.as = PSW_BITS_AS_SECONDARY, ++ .oac2.a = 1, ++ }; + + tmp1 = -4096UL; + asm volatile( +- " lghi 0,%[spec]\n" ++ " lr 0,%[spec]\n" + "0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n" + "6: jz 4f\n" + "1: algr %0,%3\n" +@@ -84,7 +88,7 @@ static inline unsigned long copy_from_us + "5:\n" + EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) +- : [spec] "K" (0x81UL) ++ : [spec] "d" (spec.val) + : "cc", "memory", "0"); + return size; + } +@@ -135,10 +139,14 @@ static inline unsigned long copy_to_user + unsigned long size) + { + unsigned long tmp1, tmp2; ++ union oac spec = { ++ .oac1.as = PSW_BITS_AS_SECONDARY, ++ .oac1.a = 1, ++ }; + + tmp1 = -4096UL; + asm volatile( +- " llilh 0,%[spec]\n" ++ " lr 0,%[spec]\n" + "0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n" + "6: jz 4f\n" + "1: algr %0,%3\n" +@@ -157,7 +165,7 @@ static inline unsigned long copy_to_user + "5:\n" + EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b) + : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) +- : [spec] "K" (0x81UL) ++ : [spec] "d" (spec.val) + : "cc", "memory", "0"); + return size; + } +@@ -270,10 +278,14 @@ EXPORT_SYMBOL(raw_copy_in_user); + static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) + { + unsigned long tmp1, tmp2; ++ union oac spec = { ++ .oac1.as = PSW_BITS_AS_SECONDARY, ++ .oac1.a = 1, ++ }; + + tmp1 = -4096UL; + asm volatile( +- " llilh 0,%[spec]\n" ++ " lr 0,%[spec]\n" + "0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n" + " jz 4f\n" + "1: algr %0,%2\n" +@@ -291,7 +303,7 @@ static inline unsigned long clear_user_m + "5:\n" + EX_TABLE(0b,2b) EX_TABLE(3b,5b) + : "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2) +- : "a" (empty_zero_page), [spec] "K" (0x81UL) ++ : "a" (empty_zero_page), [spec] "d" (spec.val) + : "cc", "memory", "0"); + return size; + } diff --git a/patches.suse/samples-bpf-Clean-up-samples-bpf-build-failes.patch b/patches.suse/samples-bpf-Clean-up-samples-bpf-build-failes.patch new file mode 100644 index 0000000..42fb040 --- /dev/null +++ b/patches.suse/samples-bpf-Clean-up-samples-bpf-build-failes.patch @@ -0,0 +1,102 @@ +From: Andrii Nakryiko +Date: Wed, 1 Dec 2021 15:28:22 -0800 +Subject: samples/bpf: Clean up samples/bpf build failes +Patch-mainline: v5.17-rc1 +Git-commit: 527024f7aeb683ce7ef49b07ef7ce9ecf015288d +References: jsc#PED-1368 + +Remove xdp_samples_user.o rule redefinition which generates Makefile +warning and instead override TPROGS_CFLAGS. This seems to work fine when +building inside selftests/bpf. + +That was one big head-scratcher before I found that generic +Makefile.target hid this surprising specialization for for xdp_samples_user.o. + +Main change is to use actual locally installed libbpf headers. + +Also drop printk macro re-definition (not even used!). + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211201232824.3166325-8-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + samples/bpf/Makefile | 13 ++++++++++++- + samples/bpf/Makefile.target | 11 ----------- + samples/bpf/hbm_kern.h | 2 -- + samples/bpf/lwt_len_hist_kern.c | 7 ------- + 4 files changed, 12 insertions(+), 21 deletions(-) + +--- a/samples/bpf/Makefile ++++ b/samples/bpf/Makefile +@@ -328,7 +328,7 @@ $(BPF_SAMPLES_PATH)/*.c: verify_target_b + $(src)/*.c: verify_target_bpf $(LIBBPF) + + libbpf_hdrs: $(LIBBPF) +-$(obj)/$(TRACE_HELPERS): | libbpf_hdrs ++$(obj)/$(TRACE_HELPERS) $(obj)/$(CGROUP_HELPERS) $(obj)/$(XDP_SAMPLE): | libbpf_hdrs + + .PHONY: libbpf_hdrs + +@@ -343,6 +343,17 @@ $(obj)/hbm_out_kern.o: $(src)/hbm.h $(sr + $(obj)/hbm.o: $(src)/hbm.h + $(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h + ++# Override includes for xdp_sample_user.o because $(srctree)/usr/include in ++# TPROGS_CFLAGS causes conflicts ++XDP_SAMPLE_CFLAGS += -Wall -O2 -lm \ ++ -I$(src)/../../tools/include \ ++ -I$(src)/../../tools/include/uapi \ ++ -I$(LIBBPF_INCLUDE) \ ++ -I$(src)/../../tools/testing/selftests/bpf ++ ++$(obj)/$(XDP_SAMPLE): TPROGS_CFLAGS = $(XDP_SAMPLE_CFLAGS) ++$(obj)/$(XDP_SAMPLE): $(src)/xdp_sample_user.h $(src)/xdp_sample_shared.h ++ + -include $(BPF_SAMPLES_PATH)/Makefile.target + + VMLINUX_BTF_PATHS ?= $(abspath $(if $(O),$(O)/vmlinux)) \ +--- a/samples/bpf/Makefile.target ++++ b/samples/bpf/Makefile.target +@@ -73,14 +73,3 @@ quiet_cmd_tprog-cobjs = CC $@ + cmd_tprog-cobjs = $(CC) $(tprogc_flags) -c -o $@ $< + $(tprog-cobjs): $(obj)/%.o: $(src)/%.c FORCE + $(call if_changed_dep,tprog-cobjs) +- +-# Override includes for xdp_sample_user.o because $(srctree)/usr/include in +-# TPROGS_CFLAGS causes conflicts +-XDP_SAMPLE_CFLAGS += -Wall -O2 -lm \ +- -I./tools/include \ +- -I./tools/include/uapi \ +- -I./tools/lib \ +- -I./tools/testing/selftests/bpf +-$(obj)/xdp_sample_user.o: $(src)/xdp_sample_user.c \ +- $(src)/xdp_sample_user.h $(src)/xdp_sample_shared.h +- $(CC) $(XDP_SAMPLE_CFLAGS) -c -o $@ $< +--- a/samples/bpf/hbm_kern.h ++++ b/samples/bpf/hbm_kern.h +@@ -9,8 +9,6 @@ + * Include file for sample Host Bandwidth Manager (HBM) BPF programs + */ + #define KBUILD_MODNAME "foo" +-#include +-#include + #include + #include + #include +--- a/samples/bpf/lwt_len_hist_kern.c ++++ b/samples/bpf/lwt_len_hist_kern.c +@@ -16,13 +16,6 @@ + #include + #include + +-# define printk(fmt, ...) \ +- ({ \ +- char ____fmt[] = fmt; \ +- bpf_trace_printk(____fmt, sizeof(____fmt), \ +- ##__VA_ARGS__); \ +- }) +- + struct bpf_elf_map { + __u32 type; + __u32 size_key; diff --git a/patches.suse/samples-bpf-Fix-conflicting-types-in-fds_example.patch b/patches.suse/samples-bpf-Fix-conflicting-types-in-fds_example.patch new file mode 100644 index 0000000..b5863a1 --- /dev/null +++ b/patches.suse/samples-bpf-Fix-conflicting-types-in-fds_example.patch @@ -0,0 +1,68 @@ +From: Alexander Lobakin +Date: Wed, 1 Dec 2021 17:49:31 +0100 +Subject: samples: bpf: Fix conflicting types in fds_example +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.17-rc1 +Git-commit: 64b5b97b8cfff64409fcc234ae3151bc8de0c4d6 +References: jsc#PED-1368 + +Fix the following samples/bpf build error appeared after the +introduction of bpf_map_create() in libbpf: + + CC samples/bpf/fds_example.o +samples/bpf/fds_example.c:49:12: error: static declaration of 'bpf_map_create' follows non-static declaration +static int bpf_map_create(void) + ^ +samples/bpf/libbpf/include/bpf/bpf.h:55:16: note: previous declaration is here +LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, + ^ +samples/bpf/fds_example.c:82:23: error: too few arguments to function call, expected 6, have 0 + fd = bpf_map_create(); + ~~~~~~~~~~~~~~ ^ +samples/bpf/libbpf/include/bpf/bpf.h:55:16: note: 'bpf_map_create' declared here +LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, + ^ +2 errors generated. + +fds_example by accident has a static function with the same name. +It's not worth it to separate a single call into its own function, +so just embed it. + +Fixes: 992c4225419a ("libbpf: Unify low-level map creation APIs w/ new bpf_map_create()") +Signed-off-by: Alexander Lobakin +Signed-off-by: Andrii Nakryiko +Reviewed-by: Maciej Fijalkowski +Acked-by: Toke Høiland-Jørgensen +Link: https://lore.kernel.org/bpf/20211201164931.47357-1-alexandr.lobakin@intel.com +Acked-by: Shung-Hsi Yu +--- + samples/bpf/fds_example.c | 9 ++------- + 1 file changed, 2 insertions(+), 7 deletions(-) + +--- a/samples/bpf/fds_example.c ++++ b/samples/bpf/fds_example.c +@@ -46,12 +46,6 @@ static void usage(void) + printf(" -h Display this help.\n"); + } + +-static int bpf_map_create(void) +-{ +- return bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(uint32_t), +- sizeof(uint32_t), 1024, 0); +-} +- + static int bpf_prog_create(const char *object) + { + static struct bpf_insn insns[] = { +@@ -79,7 +73,8 @@ static int bpf_do_map(const char *file, + int fd, ret; + + if (flags & BPF_F_PIN) { +- fd = bpf_map_create(); ++ fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(uint32_t), ++ sizeof(uint32_t), 1024, 0); + printf("bpf: map fd:%d (%s)\n", fd, strerror(errno)); + assert(fd > 0); + diff --git a/patches.suse/samples-bpf-Fix-unknown-warning-group-build-warning-.patch b/patches.suse/samples-bpf-Fix-unknown-warning-group-build-warning-.patch new file mode 100644 index 0000000..3c874ca --- /dev/null +++ b/patches.suse/samples-bpf-Fix-unknown-warning-group-build-warning-.patch @@ -0,0 +1,42 @@ +From: Alexander Lobakin +Date: Fri, 3 Dec 2021 20:50:04 +0100 +Subject: samples: bpf: Fix 'unknown warning group' build warning on Clang +Patch-mainline: v5.17-rc1 +Git-commit: 6f670d06e47c774bc065aaa84a527a4838f34bd8 +References: jsc#PED-1368 + +Clang doesn't have 'stringop-truncation' group like GCC does, and +complains about it when building samples which use xdp_sample_user +infra: + + samples/bpf/xdp_sample_user.h:48:32: warning: unknown warning group '-Wstringop-truncation', ignored [-Wunknown-warning-option] + #pragma GCC diagnostic ignored "-Wstringop-truncation" + ^ +[ repeat ] + +Those are harmless, but avoidable when guarding it with ifdef. +I could guard push/pop as well, but this would require one more +ifdef cruft around a single line which I don't think is reasonable. + +Fixes: 156f886cf697 ("samples: bpf: Add basic infrastructure for XDP samples") +Signed-off-by: Alexander Lobakin +Signed-off-by: Andrii Nakryiko +Acked-by: Kumar Kartikeya Dwivedi +Link: https://lore.kernel.org/bpf/20211203195004.5803-3-alexandr.lobakin@intel.com +Acked-by: Shung-Hsi Yu +--- + samples/bpf/xdp_sample_user.h | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/samples/bpf/xdp_sample_user.h ++++ b/samples/bpf/xdp_sample_user.h +@@ -45,7 +45,9 @@ const char *get_driver_name(int ifindex) + int get_mac_addr(int ifindex, void *mac_addr); + + #pragma GCC diagnostic push ++#ifndef __clang__ + #pragma GCC diagnostic ignored "-Wstringop-truncation" ++#endif + __attribute__((unused)) + static inline char *safe_strncpy(char *dst, const char *src, size_t size) + { diff --git a/patches.suse/samples-bpf-Fix-xdp_sample_user.o-linking-with-Clang.patch b/patches.suse/samples-bpf-Fix-xdp_sample_user.o-linking-with-Clang.patch new file mode 100644 index 0000000..a77f035 --- /dev/null +++ b/patches.suse/samples-bpf-Fix-xdp_sample_user.o-linking-with-Clang.patch @@ -0,0 +1,65 @@ +From: Alexander Lobakin +Date: Fri, 3 Dec 2021 20:50:03 +0100 +Subject: samples: bpf: Fix xdp_sample_user.o linking with Clang +Patch-mainline: v5.17-rc1 +Git-commit: e64fbcaa7a666f16329b1c67af15ea501bc84586 +References: jsc#PED-1368 + +Clang (13) doesn't get the jokes about specifying libraries to link in +cclags of individual .o objects: + +clang-13: warning: -lm: 'linker' input unused [-Wunused-command-line-argument] +[ ... ] + LD samples/bpf/xdp_redirect_cpu + LD samples/bpf/xdp_redirect_map_multi + LD samples/bpf/xdp_redirect_map + LD samples/bpf/xdp_redirect + LD samples/bpf/xdp_monitor +/usr/bin/ld: samples/bpf/xdp_sample_user.o: in function `sample_summary_print': +xdp_sample_user.c:(.text+0x84c): undefined reference to `floor' +/usr/bin/ld: xdp_sample_user.c:(.text+0x870): undefined reference to `ceil' +/usr/bin/ld: xdp_sample_user.c:(.text+0x8cf): undefined reference to `floor' +/usr/bin/ld: xdp_sample_user.c:(.text+0x8f3): undefined reference to `ceil' +[ more ] + +Specify '-lm' as ldflags for all xdp_sample_user.o users in the main +Makefile and remove it from ccflags of ^ in Makefile.target -- just +like it's done for all other samples. This works with all compilers. + +Fixes: 6e1051a54e31 ("samples: bpf: Convert xdp_monitor to XDP samples helper") +Fixes: b926c55d856c ("samples: bpf: Convert xdp_redirect to XDP samples helper") +Fixes: e531a220cc59 ("samples: bpf: Convert xdp_redirect_cpu to XDP samples helper") +Fixes: bbe65865aa05 ("samples: bpf: Convert xdp_redirect_map to XDP samples helper") +Fixes: 594a116b2aa1 ("samples: bpf: Convert xdp_redirect_map_multi to XDP samples helper") +Signed-off-by: Alexander Lobakin +Signed-off-by: Andrii Nakryiko +Acked-by: Kumar Kartikeya Dwivedi +Link: https://lore.kernel.org/bpf/20211203195004.5803-2-alexandr.lobakin@intel.com +Acked-by: Shung-Hsi Yu +--- + samples/bpf/Makefile | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/samples/bpf/Makefile ++++ b/samples/bpf/Makefile +@@ -215,6 +215,11 @@ TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib + endif + + TPROGS_LDLIBS += $(LIBBPF) -lelf -lz ++TPROGLDLIBS_xdp_monitor += -lm ++TPROGLDLIBS_xdp_redirect += -lm ++TPROGLDLIBS_xdp_redirect_cpu += -lm ++TPROGLDLIBS_xdp_redirect_map += -lm ++TPROGLDLIBS_xdp_redirect_map_multi += -lm + TPROGLDLIBS_tracex4 += -lrt + TPROGLDLIBS_trace_output += -lrt + TPROGLDLIBS_map_perf_test += -lrt +@@ -345,7 +350,7 @@ $(obj)/hbm_edt_kern.o: $(src)/hbm.h $(sr + + # Override includes for xdp_sample_user.o because $(srctree)/usr/include in + # TPROGS_CFLAGS causes conflicts +-XDP_SAMPLE_CFLAGS += -Wall -O2 -lm \ ++XDP_SAMPLE_CFLAGS += -Wall -O2 \ + -I$(src)/../../tools/include \ + -I$(src)/../../tools/include/uapi \ + -I$(LIBBPF_INCLUDE) \ diff --git a/patches.suse/samples-bpf-Get-rid-of-deprecated-libbpf-API-uses.patch b/patches.suse/samples-bpf-Get-rid-of-deprecated-libbpf-API-uses.patch new file mode 100644 index 0000000..5e1a07b --- /dev/null +++ b/patches.suse/samples-bpf-Get-rid-of-deprecated-libbpf-API-uses.patch @@ -0,0 +1,438 @@ +From: Andrii Nakryiko +Date: Wed, 1 Dec 2021 15:28:23 -0800 +Subject: samples/bpf: Get rid of deprecated libbpf API uses +Patch-mainline: v5.17-rc1 +Git-commit: c58f9815ba9735752d3735efb915e8878604684b +References: jsc#PED-1368 + +Replace deprecated APIs with new ones. Also mute source code using +deprecated AF_XDP (xsk.h). Figuring out what to do with all the AF_XDP +stuff is a separate problem that should be solved with its own set of +changes. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211201232824.3166325-9-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + samples/bpf/cookie_uid_helper_example.c | 14 +++++++++----- + samples/bpf/fds_example.c | 24 +++++++++++++++--------- + samples/bpf/map_perf_test_user.c | 15 +++++++++------ + samples/bpf/sock_example.c | 12 ++++++++---- + samples/bpf/sockex1_user.c | 15 ++++++++++++--- + samples/bpf/sockex2_user.c | 14 +++++++++++--- + samples/bpf/test_cgrp2_array_pin.c | 4 ++-- + samples/bpf/test_cgrp2_attach.c | 13 ++++++++----- + samples/bpf/test_cgrp2_sock.c | 8 ++++++-- + samples/bpf/test_lru_dist.c | 11 +++++++---- + samples/bpf/trace_output_user.c | 4 +--- + samples/bpf/xdp_sample_pkts_user.c | 22 +++++++++++----------- + samples/bpf/xdpsock_ctrl_proc.c | 3 +++ + samples/bpf/xdpsock_user.c | 3 +++ + samples/bpf/xsk_fwd.c | 3 +++ + 15 files changed, 108 insertions(+), 57 deletions(-) + +--- a/samples/bpf/cookie_uid_helper_example.c ++++ b/samples/bpf/cookie_uid_helper_example.c +@@ -67,8 +67,8 @@ static bool test_finish; + + static void maps_create(void) + { +- map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(uint32_t), +- sizeof(struct stats), 100, 0); ++ map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(uint32_t), ++ sizeof(struct stats), 100, NULL); + if (map_fd < 0) + error(1, errno, "map create failed!\n"); + } +@@ -157,9 +157,13 @@ static void prog_load(void) + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }; +- prog_fd = bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, +- ARRAY_SIZE(prog), "GPL", 0, +- log_buf, sizeof(log_buf)); ++ LIBBPF_OPTS(bpf_prog_load_opts, opts, ++ .log_buf = log_buf, ++ .log_size = sizeof(log_buf), ++ ); ++ ++ prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", ++ prog, ARRAY_SIZE(prog), &opts); + if (prog_fd < 0) + error(1, errno, "failed to load prog\n%s\n", log_buf); + } +--- a/samples/bpf/fds_example.c ++++ b/samples/bpf/fds_example.c +@@ -54,16 +54,22 @@ static int bpf_prog_create(const char *o + }; + size_t insns_cnt = sizeof(insns) / sizeof(struct bpf_insn); + struct bpf_object *obj; +- int prog_fd; ++ int err; + + if (object) { +- assert(!bpf_prog_load(object, BPF_PROG_TYPE_UNSPEC, +- &obj, &prog_fd)); +- return prog_fd; ++ obj = bpf_object__open_file(object, NULL); ++ assert(!libbpf_get_error(obj)); ++ err = bpf_object__load(obj); ++ assert(!err); ++ return bpf_program__fd(bpf_object__next_program(obj, NULL)); + } else { +- return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, +- insns, insns_cnt, "GPL", 0, +- bpf_log_buf, BPF_LOG_BUF_SIZE); ++ LIBBPF_OPTS(bpf_prog_load_opts, opts, ++ .log_buf = bpf_log_buf, ++ .log_size = BPF_LOG_BUF_SIZE, ++ ); ++ ++ return bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", ++ insns, insns_cnt, &opts); + } + } + +@@ -73,8 +79,8 @@ static int bpf_do_map(const char *file, + int fd, ret; + + if (flags & BPF_F_PIN) { +- fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(uint32_t), +- sizeof(uint32_t), 1024, 0); ++ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(uint32_t), ++ sizeof(uint32_t), 1024, NULL); + printf("bpf: map fd:%d (%s)\n", fd, strerror(errno)); + assert(fd > 0); + +--- a/samples/bpf/map_perf_test_user.c ++++ b/samples/bpf/map_perf_test_user.c +@@ -134,19 +134,22 @@ static void do_test_lru(enum test_type t + */ + int outer_fd = map_fd[array_of_lru_hashs_idx]; + unsigned int mycpu, mynode; ++ LIBBPF_OPTS(bpf_map_create_opts, opts, ++ .map_flags = BPF_F_NUMA_NODE, ++ ); + + assert(cpu < MAX_NR_CPUS); + + ret = syscall(__NR_getcpu, &mycpu, &mynode, NULL); + assert(!ret); + ++ opts.numa_node = mynode; + inner_lru_map_fds[cpu] = +- bpf_create_map_node(BPF_MAP_TYPE_LRU_HASH, +- test_map_names[INNER_LRU_HASH_PREALLOC], +- sizeof(uint32_t), +- sizeof(long), +- inner_lru_hash_size, 0, +- mynode); ++ bpf_map_create(BPF_MAP_TYPE_LRU_HASH, ++ test_map_names[INNER_LRU_HASH_PREALLOC], ++ sizeof(uint32_t), ++ sizeof(long), ++ inner_lru_hash_size, &opts); + if (inner_lru_map_fds[cpu] == -1) { + printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n", + strerror(errno), errno); +--- a/samples/bpf/sock_example.c ++++ b/samples/bpf/sock_example.c +@@ -37,8 +37,8 @@ static int test_sock(void) + int sock = -1, map_fd, prog_fd, i, key; + long long value = 0, tcp_cnt, udp_cnt, icmp_cnt; + +- map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value), +- 256, 0); ++ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(key), sizeof(value), ++ 256, NULL); + if (map_fd < 0) { + printf("failed to create map '%s'\n", strerror(errno)); + goto cleanup; +@@ -59,9 +59,13 @@ static int test_sock(void) + BPF_EXIT_INSN(), + }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); ++ LIBBPF_OPTS(bpf_prog_load_opts, opts, ++ .log_buf = bpf_log_buf, ++ .log_size = BPF_LOG_BUF_SIZE, ++ ); + +- prog_fd = bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, insns_cnt, +- "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); ++ prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", ++ prog, insns_cnt, &opts); + if (prog_fd < 0) { + printf("failed to load prog '%s'\n", strerror(errno)); + goto cleanup; +--- a/samples/bpf/sockex1_user.c ++++ b/samples/bpf/sockex1_user.c +@@ -11,17 +11,26 @@ + int main(int ac, char **argv) + { + struct bpf_object *obj; ++ struct bpf_program *prog; + int map_fd, prog_fd; + char filename[256]; +- int i, sock; ++ int i, sock, err; + FILE *f; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + +- if (bpf_prog_load(filename, BPF_PROG_TYPE_SOCKET_FILTER, +- &obj, &prog_fd)) ++ obj = bpf_object__open_file(filename, NULL); ++ if (libbpf_get_error(obj)) + return 1; + ++ prog = bpf_object__next_program(obj, NULL); ++ bpf_program__set_type(prog, BPF_PROG_TYPE_SOCKET_FILTER); ++ ++ err = bpf_object__load(obj); ++ if (err) ++ return 1; ++ ++ prog_fd = bpf_program__fd(prog); + map_fd = bpf_object__find_map_fd_by_name(obj, "my_map"); + + sock = open_raw_sock("lo"); +--- a/samples/bpf/sockex2_user.c ++++ b/samples/bpf/sockex2_user.c +@@ -16,18 +16,26 @@ struct pair { + + int main(int ac, char **argv) + { ++ struct bpf_program *prog; + struct bpf_object *obj; + int map_fd, prog_fd; + char filename[256]; +- int i, sock; ++ int i, sock, err; + FILE *f; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); ++ obj = bpf_object__open_file(filename, NULL); ++ if (libbpf_get_error(obj)) ++ return 1; ++ ++ prog = bpf_object__next_program(obj, NULL); ++ bpf_program__set_type(prog, BPF_PROG_TYPE_SOCKET_FILTER); + +- if (bpf_prog_load(filename, BPF_PROG_TYPE_SOCKET_FILTER, +- &obj, &prog_fd)) ++ err = bpf_object__load(obj); ++ if (err) + return 1; + ++ prog_fd = bpf_program__fd(prog); + map_fd = bpf_object__find_map_fd_by_name(obj, "hash_map"); + + sock = open_raw_sock("lo"); +--- a/samples/bpf/test_cgrp2_array_pin.c ++++ b/samples/bpf/test_cgrp2_array_pin.c +@@ -64,9 +64,9 @@ int main(int argc, char **argv) + } + + if (create_array) { +- array_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY, ++ array_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_ARRAY, NULL, + sizeof(uint32_t), sizeof(uint32_t), +- 1, 0); ++ 1, NULL); + if (array_fd < 0) { + fprintf(stderr, + "bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,...): %s(%d)\n", +--- a/samples/bpf/test_cgrp2_attach.c ++++ b/samples/bpf/test_cgrp2_attach.c +@@ -71,10 +71,13 @@ static int prog_load(int map_fd, int ver + BPF_EXIT_INSN(), + }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); ++ LIBBPF_OPTS(bpf_prog_load_opts, opts, ++ .log_buf = bpf_log_buf, ++ .log_size = BPF_LOG_BUF_SIZE, ++ ); + +- return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, +- prog, insns_cnt, "GPL", 0, +- bpf_log_buf, BPF_LOG_BUF_SIZE); ++ return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SKB, NULL, "GPL", ++ prog, insns_cnt, &opts); + } + + static int usage(const char *argv0) +@@ -90,9 +93,9 @@ static int attach_filter(int cg_fd, int + int prog_fd, map_fd, ret, key; + long long pkt_cnt, byte_cnt; + +- map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, ++ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, + sizeof(key), sizeof(byte_cnt), +- 256, 0); ++ 256, NULL); + if (map_fd < 0) { + printf("Failed to create map: '%s'\n", strerror(errno)); + return EXIT_FAILURE; +--- a/samples/bpf/test_cgrp2_sock.c ++++ b/samples/bpf/test_cgrp2_sock.c +@@ -70,6 +70,10 @@ static int prog_load(__u32 idx, __u32 ma + BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, priority)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, priority)), + }; ++ LIBBPF_OPTS(bpf_prog_load_opts, opts, ++ .log_buf = bpf_log_buf, ++ .log_size = BPF_LOG_BUF_SIZE, ++ ); + + struct bpf_insn *prog; + size_t insns_cnt; +@@ -115,8 +119,8 @@ static int prog_load(__u32 idx, __u32 ma + + insns_cnt /= sizeof(struct bpf_insn); + +- ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, insns_cnt, +- "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); ++ ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", ++ prog, insns_cnt, &opts); + + free(prog); + +--- a/samples/bpf/test_lru_dist.c ++++ b/samples/bpf/test_lru_dist.c +@@ -105,10 +105,10 @@ struct pfect_lru { + static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size, + unsigned int nr_possible_elems) + { +- lru->map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, ++ lru->map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, + sizeof(unsigned long long), + sizeof(struct pfect_lru_node *), +- nr_possible_elems, 0); ++ nr_possible_elems, NULL); + assert(lru->map_fd != -1); + + lru->free_nodes = malloc(lru_size * sizeof(struct pfect_lru_node)); +@@ -207,10 +207,13 @@ static unsigned int read_keys(const char + + static int create_map(int map_type, int map_flags, unsigned int size) + { ++ LIBBPF_OPTS(bpf_map_create_opts, opts, ++ .map_flags = map_flags, ++ ); + int map_fd; + +- map_fd = bpf_create_map(map_type, sizeof(unsigned long long), +- sizeof(unsigned long long), size, map_flags); ++ map_fd = bpf_map_create(map_type, NULL, sizeof(unsigned long long), ++ sizeof(unsigned long long), size, &opts); + + if (map_fd == -1) + perror("bpf_create_map"); +--- a/samples/bpf/trace_output_user.c ++++ b/samples/bpf/trace_output_user.c +@@ -43,7 +43,6 @@ static void print_bpf_output(void *ctx, + + int main(int argc, char **argv) + { +- struct perf_buffer_opts pb_opts = {}; + struct bpf_link *link = NULL; + struct bpf_program *prog; + struct perf_buffer *pb; +@@ -84,8 +83,7 @@ int main(int argc, char **argv) + goto cleanup; + } + +- pb_opts.sample_cb = print_bpf_output; +- pb = perf_buffer__new(map_fd, 8, &pb_opts); ++ pb = perf_buffer__new(map_fd, 8, print_bpf_output, NULL, NULL, NULL); + ret = libbpf_get_error(pb); + if (ret) { + printf("failed to setup perf_buffer: %d\n", ret); +--- a/samples/bpf/xdp_sample_pkts_user.c ++++ b/samples/bpf/xdp_sample_pkts_user.c +@@ -110,12 +110,9 @@ static void usage(const char *prog) + + int main(int argc, char **argv) + { +- struct bpf_prog_load_attr prog_load_attr = { +- .prog_type = BPF_PROG_TYPE_XDP, +- }; +- struct perf_buffer_opts pb_opts = {}; + const char *optstr = "FS"; + int prog_fd, map_fd, opt; ++ struct bpf_program *prog; + struct bpf_object *obj; + struct bpf_map *map; + char filename[256]; +@@ -144,15 +141,19 @@ int main(int argc, char **argv) + } + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); +- prog_load_attr.file = filename; + +- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) ++ obj = bpf_object__open_file(filename, NULL); ++ if (libbpf_get_error(obj)) + return 1; + +- if (!prog_fd) { +- printf("bpf_prog_load_xattr: %s\n", strerror(errno)); ++ prog = bpf_object__next_program(obj, NULL); ++ bpf_program__set_type(prog, BPF_PROG_TYPE_XDP); ++ ++ err = bpf_object__load(obj); ++ if (err) + return 1; +- } ++ ++ prog_fd = bpf_program__fd(prog); + + map = bpf_object__next_map(obj, NULL); + if (!map) { +@@ -181,8 +182,7 @@ int main(int argc, char **argv) + return 1; + } + +- pb_opts.sample_cb = print_bpf_output; +- pb = perf_buffer__new(map_fd, 8, &pb_opts); ++ pb = perf_buffer__new(map_fd, 8, print_bpf_output, NULL, NULL, NULL); + err = libbpf_get_error(pb); + if (err) { + perror("perf_buffer setup failed"); +--- a/samples/bpf/xdpsock_ctrl_proc.c ++++ b/samples/bpf/xdpsock_ctrl_proc.c +@@ -15,6 +15,9 @@ + #include + #include "xdpsock.h" + ++/* libbpf APIs for AF_XDP are deprecated starting from v0.7 */ ++#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ++ + static const char *opt_if = ""; + + static struct option long_options[] = { +--- a/samples/bpf/xdpsock_user.c ++++ b/samples/bpf/xdpsock_user.c +@@ -36,6 +36,9 @@ + #include + #include "xdpsock.h" + ++/* libbpf APIs for AF_XDP are deprecated starting from v0.7 */ ++#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ++ + #ifndef SOL_XDP + #define SOL_XDP 283 + #endif +--- a/samples/bpf/xsk_fwd.c ++++ b/samples/bpf/xsk_fwd.c +@@ -27,6 +27,9 @@ + #include + #include + ++/* libbpf APIs for AF_XDP are deprecated starting from v0.7 */ ++#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ++ + #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + + typedef __u64 u64; diff --git a/patches.suse/samples-bpf-Remove-unneeded-variable.patch b/patches.suse/samples-bpf-Remove-unneeded-variable.patch new file mode 100644 index 0000000..420b730 --- /dev/null +++ b/patches.suse/samples-bpf-Remove-unneeded-variable.patch @@ -0,0 +1,38 @@ +From: Minghao Chi +Date: Thu, 9 Dec 2021 08:00:51 +0000 +Subject: samples/bpf: Remove unneeded variable +Patch-mainline: v5.17-rc1 +Git-commit: ac55b3f00c323cf09d59a191e14bcf39b691078c +References: jsc#PED-1368 + +Return value directly instead of taking this in another redundant variable. + +Reported-by: Zeal Robot +Signed-off-by: Minghao Chi +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211209080051.421844-1-chi.minghao@zte.com.cn +Acked-by: Shung-Hsi Yu +--- + samples/bpf/xdp_redirect_cpu.bpf.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/samples/bpf/xdp_redirect_cpu.bpf.c ++++ b/samples/bpf/xdp_redirect_cpu.bpf.c +@@ -100,7 +100,6 @@ u16 get_dest_port_ipv4_udp(struct xdp_md + void *data = (void *)(long)ctx->data; + struct iphdr *iph = data + nh_off; + struct udphdr *udph; +- u16 dport; + + if (iph + 1 > data_end) + return 0; +@@ -111,8 +110,7 @@ u16 get_dest_port_ipv4_udp(struct xdp_md + if (udph + 1 > data_end) + return 0; + +- dport = bpf_ntohs(udph->dest); +- return dport; ++ return bpf_ntohs(udph->dest); + } + + static __always_inline diff --git a/patches.suse/samples-bpf-Stop-using-bpf_object__find_program_by_t.patch b/patches.suse/samples-bpf-Stop-using-bpf_object__find_program_by_t.patch new file mode 100644 index 0000000..4d027b8 --- /dev/null +++ b/patches.suse/samples-bpf-Stop-using-bpf_object__find_program_by_t.patch @@ -0,0 +1,75 @@ +From: Kui-Feng Lee +Date: Mon, 13 Dec 2021 19:59:29 -0800 +Subject: samples/bpf: Stop using bpf_object__find_program_by_title API. +Patch-mainline: v5.17-rc1 +Git-commit: 7490d59268168adf16aa319b007986778080d367 +References: jsc#PED-1368 + +bpf_object__find_program_by_title is going to be deprecated. +Replace use cases of bpf_object__find_program_by_title in samples/bpf/ +with bpf_object__for_each_program. + +Signed-off-by: Kui-Feng Lee +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211214035931.1148209-3-kuifeng@fb.com +Acked-by: Shung-Hsi Yu +--- + samples/bpf/hbm.c | 11 ++++++++++- + samples/bpf/xdp_fwd_user.c | 12 ++++++++++-- + 2 files changed, 20 insertions(+), 3 deletions(-) + +--- a/samples/bpf/hbm.c ++++ b/samples/bpf/hbm.c +@@ -120,6 +120,9 @@ static void do_error(char *msg, bool err + + static int prog_load(char *prog) + { ++ struct bpf_program *pos; ++ const char *sec_name; ++ + obj = bpf_object__open_file(prog, NULL); + if (libbpf_get_error(obj)) { + printf("ERROR: opening BPF object file failed\n"); +@@ -132,7 +135,13 @@ static int prog_load(char *prog) + goto err; + } + +- bpf_prog = bpf_object__find_program_by_title(obj, "cgroup_skb/egress"); ++ bpf_object__for_each_program(pos, obj) { ++ sec_name = bpf_program__section_name(pos); ++ if (sec_name && !strcmp(sec_name, "cgroup_skb/egress")) { ++ bpf_prog = pos; ++ break; ++ } ++ } + if (!bpf_prog) { + printf("ERROR: finding a prog in obj file failed\n"); + goto err; +--- a/samples/bpf/xdp_fwd_user.c ++++ b/samples/bpf/xdp_fwd_user.c +@@ -79,7 +79,9 @@ int main(int argc, char **argv) + .prog_type = BPF_PROG_TYPE_XDP, + }; + const char *prog_name = "xdp_fwd"; +- struct bpf_program *prog; ++ struct bpf_program *prog = NULL; ++ struct bpf_program *pos; ++ const char *sec_name; + int prog_fd, map_fd = -1; + char filename[PATH_MAX]; + struct bpf_object *obj; +@@ -134,7 +136,13 @@ int main(int argc, char **argv) + return 1; + } + +- prog = bpf_object__find_program_by_title(obj, prog_name); ++ bpf_object__for_each_program(pos, obj) { ++ sec_name = bpf_program__section_name(pos); ++ if (sec_name && !strcmp(sec_name, prog_name)) { ++ prog = pos; ++ break; ++ } ++ } + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + printf("program not found: %s\n", strerror(prog_fd)); diff --git a/patches.suse/samples-bpf-test_overhead_kprobe_kern-replace-bpf_pr.patch b/patches.suse/samples-bpf-test_overhead_kprobe_kern-replace-bpf_pr.patch new file mode 100644 index 0000000..392f780 --- /dev/null +++ b/patches.suse/samples-bpf-test_overhead_kprobe_kern-replace-bpf_pr.patch @@ -0,0 +1,109 @@ +From: Yafang Shao +Date: Wed, 19 Jan 2022 18:08:33 -0800 +Subject: samples/bpf/test_overhead_kprobe_kern: replace bpf_probe_read_kernel + with bpf_probe_read_kernel_str to get task comm +Patch-mainline: v5.17-rc1 +Git-commit: d068144d3b2cae09062ed936a3865c093ff69590 +References: jsc#PED-1368 + +bpf_probe_read_kernel_str() will add a nul terminator to the dst, then +we don't care about if the dst size is big enough. This patch also +replaces the hard-coded 16 with TASK_COMM_LEN to make it grepable. + +Link: https://lkml.kernel.org/r/20211120112738.45980-6-laoar.shao@gmail.com +Signed-off-by: Yafang Shao +Reviewed-by: Kees Cook +Acked-by: Andrii Nakryiko +Reviewed-by: David Hildenbrand +Cc: Mathieu Desnoyers +Cc: Arnaldo Carvalho de Melo +Cc: Alexei Starovoitov +Cc: Andrii Nakryiko +Cc: Michal Miroslaw +Cc: Peter Zijlstra +Cc: Steven Rostedt +Cc: Matthew Wilcox +Cc: David Hildenbrand +Cc: Al Viro +Cc: Kees Cook +Cc: Petr Mladek +Cc: Dennis Dalessandro +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Acked-by: Shung-Hsi Yu +--- + samples/bpf/offwaketime_kern.c | 4 ++-- + samples/bpf/test_overhead_kprobe_kern.c | 11 ++++++----- + samples/bpf/test_overhead_tp_kern.c | 5 +++-- + 3 files changed, 11 insertions(+), 9 deletions(-) + +--- a/samples/bpf/offwaketime_kern.c ++++ b/samples/bpf/offwaketime_kern.c +@@ -112,11 +112,11 @@ static inline int update_counts(void *ct + /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ + struct sched_switch_args { + unsigned long long pad; +- char prev_comm[16]; ++ char prev_comm[TASK_COMM_LEN]; + int prev_pid; + int prev_prio; + long long prev_state; +- char next_comm[16]; ++ char next_comm[TASK_COMM_LEN]; + int next_pid; + int next_prio; + }; +--- a/samples/bpf/test_overhead_kprobe_kern.c ++++ b/samples/bpf/test_overhead_kprobe_kern.c +@@ -6,6 +6,7 @@ + */ + #include + #include ++#include + #include + #include + #include +@@ -22,17 +23,17 @@ int prog(struct pt_regs *ctx) + { + struct signal_struct *signal; + struct task_struct *tsk; +- char oldcomm[16] = {}; +- char newcomm[16] = {}; ++ char oldcomm[TASK_COMM_LEN] = {}; ++ char newcomm[TASK_COMM_LEN] = {}; + u16 oom_score_adj; + u32 pid; + + tsk = (void *)PT_REGS_PARM1(ctx); + + pid = _(tsk->pid); +- bpf_probe_read_kernel(oldcomm, sizeof(oldcomm), &tsk->comm); +- bpf_probe_read_kernel(newcomm, sizeof(newcomm), +- (void *)PT_REGS_PARM2(ctx)); ++ bpf_probe_read_kernel_str(oldcomm, sizeof(oldcomm), &tsk->comm); ++ bpf_probe_read_kernel_str(newcomm, sizeof(newcomm), ++ (void *)PT_REGS_PARM2(ctx)); + signal = _(tsk->signal); + oom_score_adj = _(signal->oom_score_adj); + return 0; +--- a/samples/bpf/test_overhead_tp_kern.c ++++ b/samples/bpf/test_overhead_tp_kern.c +@@ -4,6 +4,7 @@ + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ ++#include + #include + #include + +@@ -11,8 +12,8 @@ + struct task_rename { + __u64 pad; + __u32 pid; +- char oldcomm[16]; +- char newcomm[16]; ++ char oldcomm[TASK_COMM_LEN]; ++ char newcomm[TASK_COMM_LEN]; + __u16 oom_score_adj; + }; + SEC("tracepoint/task/task_rename") diff --git a/patches.suse/samples-bpf-xdpsock-Add-Dest-and-Src-MAC-setting-for.patch b/patches.suse/samples-bpf-xdpsock-Add-Dest-and-Src-MAC-setting-for.patch new file mode 100644 index 0000000..b2900dd --- /dev/null +++ b/patches.suse/samples-bpf-xdpsock-Add-Dest-and-Src-MAC-setting-for.patch @@ -0,0 +1,123 @@ +From: Ong Boon Leong +Date: Thu, 30 Dec 2021 11:54:42 +0800 +Subject: samples/bpf: xdpsock: Add Dest and Src MAC setting for Tx-only + operation +Patch-mainline: v5.17-rc1 +Git-commit: 6440a6c23f6c72c57dbdf7928d92d3fc1aef6edc +References: jsc#PED-1368 + +To set Dest MAC address (-G|--tx-dmac) only: + $ xdpsock -i eth0 -t -N -z -G aa:bb:cc:dd:ee:ff + +To set Source MAC address (-H|--tx-smac) only: + $ xdpsock -i eth0 -t -N -z -H 11:22:33:44:55:66 + +To set both Dest and Source MAC address: + $ xdpsock -i eth0 -t -N -z -G aa:bb:cc:dd:ee:ff \ + -H 11:22:33:44:55:66 + +The default Dest and Source MAC address remain the same as before. + +Signed-off-by: Ong Boon Leong +Signed-off-by: Alexei Starovoitov +Acked-by: Song Liu +Acked-by: Jesper Dangaard Brouer +Link: https://lore.kernel.org/bpf/20211230035447.523177-3-boon.leong.ong@intel.com +Acked-by: Shung-Hsi Yu +--- + samples/bpf/xdpsock_user.c | 35 ++++++++++++++++++++++++++++++----- + 1 file changed, 30 insertions(+), 5 deletions(-) + +--- a/samples/bpf/xdpsock_user.c ++++ b/samples/bpf/xdpsock_user.c +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -90,6 +91,10 @@ static u32 opt_pkt_fill_pattern = 0x1234 + static bool opt_vlan_tag; + static u16 opt_pkt_vlan_id = VLAN_VID__DEFAULT; + static u16 opt_pkt_vlan_pri = VLAN_PRI__DEFAULT; ++static struct ether_addr opt_txdmac = {{ 0x3c, 0xfd, 0xfe, ++ 0x9e, 0x7f, 0x71 }}; ++static struct ether_addr opt_txsmac = {{ 0xec, 0xb1, 0xd7, ++ 0x98, 0x3a, 0xc0 }}; + static bool opt_extra_stats; + static bool opt_quiet; + static bool opt_app_stats; +@@ -785,8 +790,8 @@ static void gen_eth_hdr_data(void) + sizeof(struct vlan_ethhdr)); + + /* ethernet & VLAN header */ +- memcpy(veth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN); +- memcpy(veth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN); ++ memcpy(veth_hdr->h_dest, &opt_txdmac, ETH_ALEN); ++ memcpy(veth_hdr->h_source, &opt_txsmac, ETH_ALEN); + veth_hdr->h_vlan_proto = htons(ETH_P_8021Q); + vlan_tci = opt_pkt_vlan_id & VLAN_VID_MASK; + vlan_tci |= (opt_pkt_vlan_pri << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK; +@@ -802,8 +807,8 @@ static void gen_eth_hdr_data(void) + sizeof(struct ethhdr)); + + /* ethernet header */ +- memcpy(eth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN); +- memcpy(eth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN); ++ memcpy(eth_hdr->h_dest, &opt_txdmac, ETH_ALEN); ++ memcpy(eth_hdr->h_source, &opt_txsmac, ETH_ALEN); + eth_hdr->h_proto = htons(ETH_P_IP); + } + +@@ -967,6 +972,8 @@ static struct option long_options[] = { + {"tx-vlan", no_argument, 0, 'V'}, + {"tx-vlan-id", required_argument, 0, 'J'}, + {"tx-vlan-pri", required_argument, 0, 'K'}, ++ {"tx-dmac", required_argument, 0, 'G'}, ++ {"tx-smac", required_argument, 0, 'H'}, + {"extra-stats", no_argument, 0, 'x'}, + {"quiet", no_argument, 0, 'Q'}, + {"app-stats", no_argument, 0, 'a'}, +@@ -1010,6 +1017,8 @@ static void usage(const char *prog) + " -V, --tx-vlan Send VLAN tagged packets (For -t|--txonly)\n" + " -J, --tx-vlan-id=n Tx VLAN ID [1-4095]. Default: %d (For -V|--tx-vlan)\n" + " -K, --tx-vlan-pri=n Tx VLAN Priority [0-7]. Default: %d (For -V|--tx-vlan)\n" ++ " -G, --tx-dmac= Dest MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n" ++ " -H, --tx-smac= Src MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n" + " -x, --extra-stats Display extra statistics.\n" + " -Q, --quiet Do not display any stats.\n" + " -a, --app-stats Display application (syscall) statistics.\n" +@@ -1032,7 +1041,7 @@ static void parse_command_line(int argc, + opterr = 0; + + for (;;) { +- c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:VJ:K:xQaI:BR", ++ c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:VJ:K:G:H:xQaI:BR", + long_options, &option_index); + if (c == -1) + break; +@@ -1122,6 +1131,22 @@ static void parse_command_line(int argc, + case 'K': + opt_pkt_vlan_pri = atoi(optarg); + break; ++ case 'G': ++ if (!ether_aton_r(optarg, ++ (struct ether_addr *)&opt_txdmac)) { ++ fprintf(stderr, "Invalid dmac address:%s\n", ++ optarg); ++ usage(basename(argv[0])); ++ } ++ break; ++ case 'H': ++ if (!ether_aton_r(optarg, ++ (struct ether_addr *)&opt_txsmac)) { ++ fprintf(stderr, "Invalid smac address:%s\n", ++ optarg); ++ usage(basename(argv[0])); ++ } ++ break; + case 'x': + opt_extra_stats = 1; + break; diff --git a/patches.suse/samples-bpf-xdpsock-Add-VLAN-support-for-Tx-only-ope.patch b/patches.suse/samples-bpf-xdpsock-Add-VLAN-support-for-Tx-only-ope.patch new file mode 100644 index 0000000..9fdb0cc --- /dev/null +++ b/patches.suse/samples-bpf-xdpsock-Add-VLAN-support-for-Tx-only-ope.patch @@ -0,0 +1,195 @@ +From: Ong Boon Leong +Date: Thu, 30 Dec 2021 11:54:41 +0800 +Subject: samples/bpf: xdpsock: Add VLAN support for Tx-only operation +Patch-mainline: v5.17-rc1 +Git-commit: 2741a0493c04067d7acb0e44035aa27618b7d204 +References: jsc#PED-1368 + +In multi-queue environment testing, the support for VLAN-tag based +steering is useful. So, this patch adds the capability to add +VLAN tag (VLAN ID and Priority) to the generated Tx frame. + +To set the VLAN ID=10 and Priority=2 for Tx only through TxQ=3: + $ xdpsock -i eth0 -t -N -z -q 3 -V -J 10 -K 2 + +If VLAN ID (-J) and Priority (-K) is set, it default to + VLAN ID = 1 + VLAN Priority = 0. + +For example, VLAN-tagged Tx only, xdp copy mode through TxQ=1: + $ xdpsock -i eth0 -t -N -c -q 1 -V + +Signed-off-by: Ong Boon Leong +Signed-off-by: Alexei Starovoitov +Acked-by: Song Liu +Link: https://lore.kernel.org/bpf/20211230035447.523177-2-boon.leong.ong@intel.com +Acked-by: Shung-Hsi Yu +--- + samples/bpf/xdpsock_user.c | 90 +++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 75 insertions(+), 15 deletions(-) + +--- a/samples/bpf/xdpsock_user.c ++++ b/samples/bpf/xdpsock_user.c +@@ -56,6 +56,12 @@ + + #define DEBUG_HEXDUMP 0 + ++#define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */ ++#define VLAN_PRIO_SHIFT 13 ++#define VLAN_VID_MASK 0x0fff /* VLAN Identifier */ ++#define VLAN_VID__DEFAULT 1 ++#define VLAN_PRI__DEFAULT 0 ++ + typedef __u64 u64; + typedef __u32 u32; + typedef __u16 u16; +@@ -81,6 +87,9 @@ static u32 opt_batch_size = 64; + static int opt_pkt_count; + static u16 opt_pkt_size = MIN_PKT_SIZE; + static u32 opt_pkt_fill_pattern = 0x12345678; ++static bool opt_vlan_tag; ++static u16 opt_pkt_vlan_id = VLAN_VID__DEFAULT; ++static u16 opt_pkt_vlan_pri = VLAN_PRI__DEFAULT; + static bool opt_extra_stats; + static bool opt_quiet; + static bool opt_app_stats; +@@ -101,6 +110,14 @@ static u32 prog_id; + static bool opt_busy_poll; + static bool opt_reduced_cap; + ++struct vlan_ethhdr { ++ unsigned char h_dest[6]; ++ unsigned char h_source[6]; ++ __be16 h_vlan_proto; ++ __be16 h_vlan_TCI; ++ __be16 h_vlan_encapsulated_proto; ++}; ++ + struct xsk_ring_stats { + unsigned long rx_npkts; + unsigned long tx_npkts; +@@ -740,11 +757,13 @@ static inline u16 udp_csum(u32 saddr, u3 + + #define ETH_FCS_SIZE 4 + +-#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ ++#define ETH_HDR_SIZE (opt_vlan_tag ? sizeof(struct vlan_ethhdr) : \ ++ sizeof(struct ethhdr)) ++#define PKT_HDR_SIZE (ETH_HDR_SIZE + sizeof(struct iphdr) + \ + sizeof(struct udphdr)) + + #define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE) +-#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr)) ++#define IP_PKT_SIZE (PKT_SIZE - ETH_HDR_SIZE) + #define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) + #define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) + +@@ -752,17 +771,42 @@ static u8 pkt_data[XSK_UMEM__DEFAULT_FRA + + static void gen_eth_hdr_data(void) + { +- struct udphdr *udp_hdr = (struct udphdr *)(pkt_data + +- sizeof(struct ethhdr) + +- sizeof(struct iphdr)); +- struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + +- sizeof(struct ethhdr)); +- struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data; +- +- /* ethernet header */ +- memcpy(eth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN); +- memcpy(eth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN); +- eth_hdr->h_proto = htons(ETH_P_IP); ++ struct udphdr *udp_hdr; ++ struct iphdr *ip_hdr; ++ ++ if (opt_vlan_tag) { ++ struct vlan_ethhdr *veth_hdr = (struct vlan_ethhdr *)pkt_data; ++ u16 vlan_tci = 0; ++ ++ udp_hdr = (struct udphdr *)(pkt_data + ++ sizeof(struct vlan_ethhdr) + ++ sizeof(struct iphdr)); ++ ip_hdr = (struct iphdr *)(pkt_data + ++ sizeof(struct vlan_ethhdr)); ++ ++ /* ethernet & VLAN header */ ++ memcpy(veth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN); ++ memcpy(veth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN); ++ veth_hdr->h_vlan_proto = htons(ETH_P_8021Q); ++ vlan_tci = opt_pkt_vlan_id & VLAN_VID_MASK; ++ vlan_tci |= (opt_pkt_vlan_pri << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK; ++ veth_hdr->h_vlan_TCI = htons(vlan_tci); ++ veth_hdr->h_vlan_encapsulated_proto = htons(ETH_P_IP); ++ } else { ++ struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data; ++ ++ udp_hdr = (struct udphdr *)(pkt_data + ++ sizeof(struct ethhdr) + ++ sizeof(struct iphdr)); ++ ip_hdr = (struct iphdr *)(pkt_data + ++ sizeof(struct ethhdr)); ++ ++ /* ethernet header */ ++ memcpy(eth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN); ++ memcpy(eth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN); ++ eth_hdr->h_proto = htons(ETH_P_IP); ++ } ++ + + /* IP header */ + ip_hdr->version = IPVERSION; +@@ -920,6 +964,9 @@ static struct option long_options[] = { + {"tx-pkt-count", required_argument, 0, 'C'}, + {"tx-pkt-size", required_argument, 0, 's'}, + {"tx-pkt-pattern", required_argument, 0, 'P'}, ++ {"tx-vlan", no_argument, 0, 'V'}, ++ {"tx-vlan-id", required_argument, 0, 'J'}, ++ {"tx-vlan-pri", required_argument, 0, 'K'}, + {"extra-stats", no_argument, 0, 'x'}, + {"quiet", no_argument, 0, 'Q'}, + {"app-stats", no_argument, 0, 'a'}, +@@ -960,6 +1007,9 @@ static void usage(const char *prog) + " (Default: %d bytes)\n" + " Min size: %d, Max size %d.\n" + " -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n" ++ " -V, --tx-vlan Send VLAN tagged packets (For -t|--txonly)\n" ++ " -J, --tx-vlan-id=n Tx VLAN ID [1-4095]. Default: %d (For -V|--tx-vlan)\n" ++ " -K, --tx-vlan-pri=n Tx VLAN Priority [0-7]. Default: %d (For -V|--tx-vlan)\n" + " -x, --extra-stats Display extra statistics.\n" + " -Q, --quiet Do not display any stats.\n" + " -a, --app-stats Display application (syscall) statistics.\n" +@@ -969,7 +1019,8 @@ static void usage(const char *prog) + "\n"; + fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE, + opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE, +- XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern); ++ XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern, ++ VLAN_VID__DEFAULT, VLAN_PRI__DEFAULT); + + exit(EXIT_FAILURE); + } +@@ -981,7 +1032,7 @@ static void parse_command_line(int argc, + opterr = 0; + + for (;;) { +- c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQaI:BR", ++ c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:VJ:K:xQaI:BR", + long_options, &option_index); + if (c == -1) + break; +@@ -1062,6 +1113,15 @@ static void parse_command_line(int argc, + case 'P': + opt_pkt_fill_pattern = strtol(optarg, NULL, 16); + break; ++ case 'V': ++ opt_vlan_tag = true; ++ break; ++ case 'J': ++ opt_pkt_vlan_id = atoi(optarg); ++ break; ++ case 'K': ++ opt_pkt_vlan_pri = atoi(optarg); ++ break; + case 'x': + opt_extra_stats = 1; + break; diff --git a/patches.suse/samples-bpf-xdpsock-Add-clockid-selection-support.patch b/patches.suse/samples-bpf-xdpsock-Add-clockid-selection-support.patch new file mode 100644 index 0000000..3990140 --- /dev/null +++ b/patches.suse/samples-bpf-xdpsock-Add-clockid-selection-support.patch @@ -0,0 +1,115 @@ +From: Ong Boon Leong +Date: Thu, 30 Dec 2021 11:54:43 +0800 +Subject: samples/bpf: xdpsock: Add clockid selection support +Patch-mainline: v5.17-rc1 +Git-commit: 5a3882542acda1ac5f0a22dddf7f7f8533d3a8cc +References: jsc#PED-1368 + +User specifies the clock selection by using -w CLOCK or --clock=CLOCK +where CLOCK=[REALTIME, TAI, BOOTTIME, MONOTONIC]. + +The default CLOCK selection is MONOTONIC. + +The implementation of clock selection parsing is borrowed from +iproute2/tc/q_taprio.c + +Signed-off-by: Ong Boon Leong +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211230035447.523177-4-boon.leong.ong@intel.com +Acked-by: Shung-Hsi Yu +--- + samples/bpf/xdpsock_user.c | 40 ++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 38 insertions(+), 2 deletions(-) + +--- a/samples/bpf/xdpsock_user.c ++++ b/samples/bpf/xdpsock_user.c +@@ -114,6 +114,7 @@ static u32 opt_num_xsks = 1; + static u32 prog_id; + static bool opt_busy_poll; + static bool opt_reduced_cap; ++static clockid_t opt_clock = CLOCK_MONOTONIC; + + struct vlan_ethhdr { + unsigned char h_dest[6]; +@@ -178,15 +179,40 @@ struct xsk_socket_info { + u32 outstanding_tx; + }; + ++static const struct clockid_map { ++ const char *name; ++ clockid_t clockid; ++} clockids_map[] = { ++ { "REALTIME", CLOCK_REALTIME }, ++ { "TAI", CLOCK_TAI }, ++ { "BOOTTIME", CLOCK_BOOTTIME }, ++ { "MONOTONIC", CLOCK_MONOTONIC }, ++ { NULL } ++}; ++ + static int num_socks; + struct xsk_socket_info *xsks[MAX_SOCKS]; + int sock; + ++static int get_clockid(clockid_t *id, const char *name) ++{ ++ const struct clockid_map *clk; ++ ++ for (clk = clockids_map; clk->name; clk++) { ++ if (strcasecmp(clk->name, name) == 0) { ++ *id = clk->clockid; ++ return 0; ++ } ++ } ++ ++ return -1; ++} ++ + static unsigned long get_nsecs(void) + { + struct timespec ts; + +- clock_gettime(CLOCK_MONOTONIC, &ts); ++ clock_gettime(opt_clock, &ts); + return ts.tv_sec * 1000000000UL + ts.tv_nsec; + } + +@@ -965,6 +991,7 @@ static struct option long_options[] = { + {"shared-umem", no_argument, 0, 'M'}, + {"force", no_argument, 0, 'F'}, + {"duration", required_argument, 0, 'd'}, ++ {"clock", required_argument, 0, 'w'}, + {"batch-size", required_argument, 0, 'b'}, + {"tx-pkt-count", required_argument, 0, 'C'}, + {"tx-pkt-size", required_argument, 0, 's'}, +@@ -1006,6 +1033,7 @@ static void usage(const char *prog) + " -F, --force Force loading the XDP prog\n" + " -d, --duration=n Duration in secs to run command.\n" + " Default: forever.\n" ++ " -w, --clock=CLOCK Clock NAME (default MONOTONIC).\n" + " -b, --batch-size=n Batch size for sending or receiving\n" + " packets. Default: %d\n" + " -C, --tx-pkt-count=n Number of packets to send.\n" +@@ -1041,7 +1069,7 @@ static void parse_command_line(int argc, + opterr = 0; + + for (;;) { +- c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:VJ:K:G:H:xQaI:BR", ++ c = getopt_long(argc, argv, "Frtli:q:pSNn:w:czf:muMd:b:C:s:P:VJ:K:G:H:xQaI:BR", + long_options, &option_index); + if (c == -1) + break; +@@ -1075,6 +1103,14 @@ static void parse_command_line(int argc, + case 'n': + opt_interval = atoi(optarg); + break; ++ case 'w': ++ if (get_clockid(&opt_clock, optarg)) { ++ fprintf(stderr, ++ "ERROR: Invalid clock %s. Default to CLOCK_MONOTONIC.\n", ++ optarg); ++ opt_clock = CLOCK_MONOTONIC; ++ } ++ break; + case 'z': + opt_xdp_bind_flags |= XDP_ZEROCOPY; + break; diff --git a/patches.suse/samples-bpf-xdpsock-Add-cyclic-TX-operation-capabili.patch b/patches.suse/samples-bpf-xdpsock-Add-cyclic-TX-operation-capabili.patch new file mode 100644 index 0000000..1b8f591 --- /dev/null +++ b/patches.suse/samples-bpf-xdpsock-Add-cyclic-TX-operation-capabili.patch @@ -0,0 +1,234 @@ +From: Ong Boon Leong +Date: Thu, 30 Dec 2021 11:54:44 +0800 +Subject: samples/bpf: xdpsock: Add cyclic TX operation capability +Patch-mainline: v5.17-rc1 +Git-commit: fa0d27a1d5a8c1f07b0229348b0d178233694fbc +References: jsc#PED-1368 + +Tx cycle time is in micro-seconds unit. By combining the batch size (-b M) +and Tx cycle time (-T|--tx-cycle N), xdpsock now can transmit batch-size of +packets every N-us periodically. Cyclic TX operation is not applicable if +--poll mode is used. + +To transmit 16 packets every 1ms cycle time for total of 100000 packets +silently: + $ xdpsock -i eth0 -T -N -z -T 1000 -b 16 -C 100000 + +To print cyclic TX schedule variance stats, use --app-stats|-a: + $ xdpsock -i eth0 -T -N -z -T 1000 -b 16 -C 100000 -a + + sock0@eth0:0 txonly xdp-drv + pps pkts 0.00 +rx 0 0 +tx 0 100000 + + calls/s count +rx empty polls 0 0 +fill fail polls 0 0 +copy tx sendtos 0 0 +tx wakeup sendtos 0 6254 +opt polls 0 0 + + period min ave max cycle +Cyclic TX 1000000 53507 75334 712642 6250 + +Signed-off-by: Ong Boon Leong +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211230035447.523177-5-boon.leong.ong@intel.com +Acked-by: Shung-Hsi Yu +--- + samples/bpf/xdpsock_user.c | 85 ++++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 80 insertions(+), 5 deletions(-) + +--- a/samples/bpf/xdpsock_user.c ++++ b/samples/bpf/xdpsock_user.c +@@ -63,12 +63,19 @@ + #define VLAN_VID__DEFAULT 1 + #define VLAN_PRI__DEFAULT 0 + ++#define NSEC_PER_SEC 1000000000UL ++#define NSEC_PER_USEC 1000 ++ + typedef __u64 u64; + typedef __u32 u32; + typedef __u16 u16; + typedef __u8 u8; + + static unsigned long prev_time; ++static long tx_cycle_diff_min; ++static long tx_cycle_diff_max; ++static double tx_cycle_diff_ave; ++static long tx_cycle_cnt; + + enum benchmark_type { + BENCH_RXDROP = 0, +@@ -115,6 +122,7 @@ static u32 prog_id; + static bool opt_busy_poll; + static bool opt_reduced_cap; + static clockid_t opt_clock = CLOCK_MONOTONIC; ++static unsigned long opt_tx_cycle_ns; + + struct vlan_ethhdr { + unsigned char h_dest[6]; +@@ -305,6 +313,15 @@ static void dump_app_stats(long dt) + xsks[i]->app_stats.prev_tx_wakeup_sendtos = xsks[i]->app_stats.tx_wakeup_sendtos; + xsks[i]->app_stats.prev_opt_polls = xsks[i]->app_stats.opt_polls; + } ++ ++ if (opt_tx_cycle_ns) { ++ printf("\n%-18s %-10s %-10s %-10s %-10s %-10s\n", ++ "", "period", "min", "ave", "max", "cycle"); ++ printf("%-18s %-10lu %-10lu %-10lu %-10lu %-10lu\n", ++ "Cyclic TX", opt_tx_cycle_ns, tx_cycle_diff_min, ++ (long)(tx_cycle_diff_ave / tx_cycle_cnt), ++ tx_cycle_diff_max, tx_cycle_cnt); ++ } + } + + static bool get_interrupt_number(void) +@@ -1001,6 +1018,7 @@ static struct option long_options[] = { + {"tx-vlan-pri", required_argument, 0, 'K'}, + {"tx-dmac", required_argument, 0, 'G'}, + {"tx-smac", required_argument, 0, 'H'}, ++ {"tx-cycle", required_argument, 0, 'T'}, + {"extra-stats", no_argument, 0, 'x'}, + {"quiet", no_argument, 0, 'Q'}, + {"app-stats", no_argument, 0, 'a'}, +@@ -1047,6 +1065,7 @@ static void usage(const char *prog) + " -K, --tx-vlan-pri=n Tx VLAN Priority [0-7]. Default: %d (For -V|--tx-vlan)\n" + " -G, --tx-dmac= Dest MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n" + " -H, --tx-smac= Src MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n" ++ " -T, --tx-cycle=n Tx cycle time in micro-seconds (For -t|--txonly).\n" + " -x, --extra-stats Display extra statistics.\n" + " -Q, --quiet Do not display any stats.\n" + " -a, --app-stats Display application (syscall) statistics.\n" +@@ -1069,7 +1088,7 @@ static void parse_command_line(int argc, + opterr = 0; + + for (;;) { +- c = getopt_long(argc, argv, "Frtli:q:pSNn:w:czf:muMd:b:C:s:P:VJ:K:G:H:xQaI:BR", ++ c = getopt_long(argc, argv, "Frtli:q:pSNn:w:czf:muMd:b:C:s:P:VJ:K:G:H:T:xQaI:BR", + long_options, &option_index); + if (c == -1) + break; +@@ -1183,6 +1202,10 @@ static void parse_command_line(int argc, + usage(basename(argv[0])); + } + break; ++ case 'T': ++ opt_tx_cycle_ns = atoi(optarg); ++ opt_tx_cycle_ns *= NSEC_PER_USEC; ++ break; + case 'x': + opt_extra_stats = 1; + break; +@@ -1388,7 +1411,7 @@ static void rx_drop_all(void) + } + } + +-static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) ++static int tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) + { + u32 idx; + unsigned int i; +@@ -1397,7 +1420,7 @@ static void tx_only(struct xsk_socket_in + batch_size) { + complete_tx_only(xsk, batch_size); + if (benchmark_done) +- return; ++ return 0; + } + + for (i = 0; i < batch_size; i++) { +@@ -1413,6 +1436,8 @@ static void tx_only(struct xsk_socket_in + *frame_nb += batch_size; + *frame_nb %= NUM_FRAMES; + complete_tx_only(xsk, batch_size); ++ ++ return batch_size; + } + + static inline int get_batch_size(int pkt_cnt) +@@ -1446,16 +1471,39 @@ static void tx_only_all(void) + { + struct pollfd fds[MAX_SOCKS] = {}; + u32 frame_nb[MAX_SOCKS] = {}; ++ unsigned long next_tx_ns = 0; + int pkt_cnt = 0; + int i, ret; + ++ if (opt_poll && opt_tx_cycle_ns) { ++ fprintf(stderr, ++ "Error: --poll and --tx-cycles are both set\n"); ++ return; ++ } ++ + for (i = 0; i < num_socks; i++) { + fds[0].fd = xsk_socket__fd(xsks[i]->xsk); + fds[0].events = POLLOUT; + } + ++ if (opt_tx_cycle_ns) { ++ /* Align Tx time to micro-second boundary */ ++ next_tx_ns = (get_nsecs() / NSEC_PER_USEC + 1) * ++ NSEC_PER_USEC; ++ next_tx_ns += opt_tx_cycle_ns; ++ ++ /* Initialize periodic Tx scheduling variance */ ++ tx_cycle_diff_min = 1000000000; ++ tx_cycle_diff_max = 0; ++ tx_cycle_diff_ave = 0.0; ++ } ++ + while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) { + int batch_size = get_batch_size(pkt_cnt); ++ struct timespec next; ++ int tx_cnt = 0; ++ long diff; ++ int err; + + if (opt_poll) { + for (i = 0; i < num_socks; i++) +@@ -1468,13 +1516,40 @@ static void tx_only_all(void) + continue; + } + ++ if (opt_tx_cycle_ns) { ++ next.tv_sec = next_tx_ns / NSEC_PER_SEC; ++ next.tv_nsec = next_tx_ns % NSEC_PER_SEC; ++ err = clock_nanosleep(opt_clock, TIMER_ABSTIME, &next, NULL); ++ if (err) { ++ if (err != EINTR) ++ fprintf(stderr, ++ "clock_nanosleep failed. Err:%d errno:%d\n", ++ err, errno); ++ break; ++ } ++ ++ /* Measure periodic Tx scheduling variance */ ++ diff = get_nsecs() - next_tx_ns; ++ if (diff < tx_cycle_diff_min) ++ tx_cycle_diff_min = diff; ++ ++ if (diff > tx_cycle_diff_max) ++ tx_cycle_diff_max = diff; ++ ++ tx_cycle_diff_ave += (double)diff; ++ tx_cycle_cnt++; ++ } ++ + for (i = 0; i < num_socks; i++) +- tx_only(xsks[i], &frame_nb[i], batch_size); ++ tx_cnt += tx_only(xsks[i], &frame_nb[i], batch_size); + +- pkt_cnt += batch_size; ++ pkt_cnt += tx_cnt; + + if (benchmark_done) + break; ++ ++ if (opt_tx_cycle_ns) ++ next_tx_ns += opt_tx_cycle_ns; + } + + if (opt_pkt_count) diff --git a/patches.suse/samples-bpf-xdpsock-Add-sched-policy-and-priority-su.patch b/patches.suse/samples-bpf-xdpsock-Add-sched-policy-and-priority-su.patch new file mode 100644 index 0000000..3a18fed --- /dev/null +++ b/patches.suse/samples-bpf-xdpsock-Add-sched-policy-and-priority-su.patch @@ -0,0 +1,185 @@ +From: Ong Boon Leong +Date: Thu, 30 Dec 2021 11:54:45 +0800 +Subject: samples/bpf: xdpsock: Add sched policy and priority support +Patch-mainline: v5.17-rc1 +Git-commit: fa24d0b1d57825d1a5b802339728d4d8ac20b6d6 +References: jsc#PED-1368 + +By default, TX schedule policy is SCHED_OTHER (round-robin time-sharing). +To improve TX cyclic scheduling, we add SCHED_FIFO policy and its priority +by using -W FIFO or --policy=FIFO and -U or --schpri=. + +A) From xdpsock --app-stats, for SCHED_OTHER policy: + $ xdpsock -i eth0 -t -N -z -T 1000 -b 16 -C 100000 -a + + period min ave max cycle + Cyclic TX 1000000 53507 75334 712642 6250 + +B) For SCHED_FIFO policy and schpri=50: + $ xdpsock -i eth0 -t -N -z -T 1000 -b 16 -C 100000 -a -W FIFO -U 50 + + period min ave max cycle + Cyclic TX 1000000 3699 24859 54397 6250 + +Signed-off-by: Ong Boon Leong +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211230035447.523177-6-boon.leong.ong@intel.com +Acked-by: Shung-Hsi Yu +--- + samples/bpf/xdpsock_user.c | 61 +++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 59 insertions(+), 2 deletions(-) + +--- a/samples/bpf/xdpsock_user.c ++++ b/samples/bpf/xdpsock_user.c +@@ -31,6 +31,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -66,6 +67,8 @@ + #define NSEC_PER_SEC 1000000000UL + #define NSEC_PER_USEC 1000 + ++#define SCHED_PRI__DEFAULT 0 ++ + typedef __u64 u64; + typedef __u32 u32; + typedef __u16 u16; +@@ -123,6 +126,8 @@ static bool opt_busy_poll; + static bool opt_reduced_cap; + static clockid_t opt_clock = CLOCK_MONOTONIC; + static unsigned long opt_tx_cycle_ns; ++static int opt_schpolicy = SCHED_OTHER; ++static int opt_schprio = SCHED_PRI__DEFAULT; + + struct vlan_ethhdr { + unsigned char h_dest[6]; +@@ -198,6 +203,15 @@ static const struct clockid_map { + { NULL } + }; + ++static const struct sched_map { ++ const char *name; ++ int policy; ++} schmap[] = { ++ { "OTHER", SCHED_OTHER }, ++ { "FIFO", SCHED_FIFO }, ++ { NULL } ++}; ++ + static int num_socks; + struct xsk_socket_info *xsks[MAX_SOCKS]; + int sock; +@@ -216,6 +230,20 @@ static int get_clockid(clockid_t *id, co + return -1; + } + ++static int get_schpolicy(int *policy, const char *name) ++{ ++ const struct sched_map *sch; ++ ++ for (sch = schmap; sch->name; sch++) { ++ if (strcasecmp(sch->name, name) == 0) { ++ *policy = sch->policy; ++ return 0; ++ } ++ } ++ ++ return -1; ++} ++ + static unsigned long get_nsecs(void) + { + struct timespec ts; +@@ -1019,6 +1047,8 @@ static struct option long_options[] = { + {"tx-dmac", required_argument, 0, 'G'}, + {"tx-smac", required_argument, 0, 'H'}, + {"tx-cycle", required_argument, 0, 'T'}, ++ {"policy", required_argument, 0, 'W'}, ++ {"schpri", required_argument, 0, 'U'}, + {"extra-stats", no_argument, 0, 'x'}, + {"quiet", no_argument, 0, 'Q'}, + {"app-stats", no_argument, 0, 'a'}, +@@ -1066,6 +1096,8 @@ static void usage(const char *prog) + " -G, --tx-dmac= Dest MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n" + " -H, --tx-smac= Src MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n" + " -T, --tx-cycle=n Tx cycle time in micro-seconds (For -t|--txonly).\n" ++ " -W, --policy=POLICY Schedule policy. Default: SCHED_OTHER\n" ++ " -U, --schpri=n Schedule priority. Default: %d\n" + " -x, --extra-stats Display extra statistics.\n" + " -Q, --quiet Do not display any stats.\n" + " -a, --app-stats Display application (syscall) statistics.\n" +@@ -1076,7 +1108,8 @@ static void usage(const char *prog) + fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE, + opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE, + XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern, +- VLAN_VID__DEFAULT, VLAN_PRI__DEFAULT); ++ VLAN_VID__DEFAULT, VLAN_PRI__DEFAULT, ++ SCHED_PRI__DEFAULT); + + exit(EXIT_FAILURE); + } +@@ -1088,7 +1121,8 @@ static void parse_command_line(int argc, + opterr = 0; + + for (;;) { +- c = getopt_long(argc, argv, "Frtli:q:pSNn:w:czf:muMd:b:C:s:P:VJ:K:G:H:T:xQaI:BR", ++ c = getopt_long(argc, argv, ++ "Frtli:q:pSNn:w:czf:muMd:b:C:s:P:VJ:K:G:H:T:W:U:xQaI:BR", + long_options, &option_index); + if (c == -1) + break; +@@ -1206,6 +1240,17 @@ static void parse_command_line(int argc, + opt_tx_cycle_ns = atoi(optarg); + opt_tx_cycle_ns *= NSEC_PER_USEC; + break; ++ case 'W': ++ if (get_schpolicy(&opt_schpolicy, optarg)) { ++ fprintf(stderr, ++ "ERROR: Invalid policy %s. Default to SCHED_OTHER.\n", ++ optarg); ++ opt_schpolicy = SCHED_OTHER; ++ } ++ break; ++ case 'U': ++ opt_schprio = atoi(optarg); ++ break; + case 'x': + opt_extra_stats = 1; + break; +@@ -1780,6 +1825,7 @@ int main(int argc, char **argv) + struct __user_cap_data_struct data[2] = { { 0 } }; + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + bool rx = false, tx = false; ++ struct sched_param schparam; + struct xsk_umem_info *umem; + struct bpf_object *obj; + int xsks_map_fd = 0; +@@ -1881,6 +1927,16 @@ int main(int argc, char **argv) + prev_time = get_nsecs(); + start_time = prev_time; + ++ /* Configure sched priority for better wake-up accuracy */ ++ memset(&schparam, 0, sizeof(schparam)); ++ schparam.sched_priority = opt_schprio; ++ ret = sched_setscheduler(0, opt_schpolicy, &schparam); ++ if (ret) { ++ fprintf(stderr, "Error(%d) in setting priority(%d): %s\n", ++ errno, opt_schprio, strerror(errno)); ++ goto out; ++ } ++ + if (opt_bench == BENCH_RXDROP) + rx_drop_all(); + else if (opt_bench == BENCH_TXONLY) +@@ -1888,6 +1944,7 @@ int main(int argc, char **argv) + else + l2fwd_all(); + ++out: + benchmark_done = true; + + if (!opt_quiet) diff --git a/patches.suse/samples-bpf-xdpsock-Add-time-out-for-cleaning-Tx.patch b/patches.suse/samples-bpf-xdpsock-Add-time-out-for-cleaning-Tx.patch new file mode 100644 index 0000000..0377892 --- /dev/null +++ b/patches.suse/samples-bpf-xdpsock-Add-time-out-for-cleaning-Tx.patch @@ -0,0 +1,80 @@ +From: Ong Boon Leong +Date: Thu, 30 Dec 2021 11:54:46 +0800 +Subject: samples/bpf: xdpsock: Add time-out for cleaning Tx +Patch-mainline: v5.17-rc1 +Git-commit: 8121e78932018df48758985e00651e16ff34ae5f +References: jsc#PED-1368 + +When user sets tx-pkt-count and in case where there are invalid Tx frame, +the complete_tx_only_all() process polls indefinitely. So, this patch +adds a time-out mechanism into the process so that the application +can terminate automatically after it retries 3*polling interval duration. + +v1->v2: + Thanks to Jesper's and Song Liu's suggestion. + - clean-up git message to remove polling log + - make the Tx time-out retries configurable with 1s granularity + +Signed-off-by: Ong Boon Leong +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211230035447.523177-7-boon.leong.ong@intel.com +Acked-by: Shung-Hsi Yu +--- + samples/bpf/xdpsock_user.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +--- a/samples/bpf/xdpsock_user.c ++++ b/samples/bpf/xdpsock_user.c +@@ -113,6 +113,7 @@ static u32 irq_no; + static int irqs_at_init = -1; + static int opt_poll; + static int opt_interval = 1; ++static int opt_retries = 3; + static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP; + static u32 opt_umem_flags; + static int opt_unaligned_chunks; +@@ -1028,6 +1029,7 @@ static struct option long_options[] = { + {"xdp-skb", no_argument, 0, 'S'}, + {"xdp-native", no_argument, 0, 'N'}, + {"interval", required_argument, 0, 'n'}, ++ {"retries", required_argument, 0, 'O'}, + {"zero-copy", no_argument, 0, 'z'}, + {"copy", no_argument, 0, 'c'}, + {"frame-size", required_argument, 0, 'f'}, +@@ -1072,6 +1074,7 @@ static void usage(const char *prog) + " -S, --xdp-skb=n Use XDP skb-mod\n" + " -N, --xdp-native=n Enforce XDP native mode\n" + " -n, --interval=n Specify statistics update interval (default 1 sec).\n" ++ " -O, --retries=n Specify time-out retries (1s interval) attempt (default 3).\n" + " -z, --zero-copy Force zero-copy mode.\n" + " -c, --copy Force copy mode.\n" + " -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n" +@@ -1122,7 +1125,7 @@ static void parse_command_line(int argc, + + for (;;) { + c = getopt_long(argc, argv, +- "Frtli:q:pSNn:w:czf:muMd:b:C:s:P:VJ:K:G:H:T:W:U:xQaI:BR", ++ "Frtli:q:pSNn:w:O:czf:muMd:b:C:s:P:VJ:K:G:H:T:W:U:xQaI:BR", + long_options, &option_index); + if (c == -1) + break; +@@ -1164,6 +1167,9 @@ static void parse_command_line(int argc, + opt_clock = CLOCK_MONOTONIC; + } + break; ++ case 'O': ++ opt_retries = atoi(optarg); ++ break; + case 'z': + opt_xdp_bind_flags |= XDP_ZEROCOPY; + break; +@@ -1509,7 +1515,8 @@ static void complete_tx_only_all(void) + pending = !!xsks[i]->outstanding_tx; + } + } +- } while (pending); ++ sleep(1); ++ } while (pending && opt_retries-- > 0); + } + + static void tx_only_all(void) diff --git a/patches.suse/samples-bpf-xdpsock-Add-timestamp-for-Tx-only-operat.patch b/patches.suse/samples-bpf-xdpsock-Add-timestamp-for-Tx-only-operat.patch new file mode 100644 index 0000000..d705184 --- /dev/null +++ b/patches.suse/samples-bpf-xdpsock-Add-timestamp-for-Tx-only-operat.patch @@ -0,0 +1,248 @@ +From: Ong Boon Leong +Date: Thu, 30 Dec 2021 11:54:47 +0800 +Subject: samples/bpf: xdpsock: Add timestamp for Tx-only operation +Patch-mainline: v5.17-rc1 +Git-commit: eb68db45b747756c351ea84e9af55a69468d0549 +References: jsc#PED-1368 + +It may be useful to add timestamp for Tx packets for continuous or cyclic +transmit operation. The timestamp and sequence ID of a Tx packet are +stored according to pktgen header format. To enable per-packet timestamp, +use -y|--tstamp option. If timestamp is off, pktgen header is not +included in the UDP payload. This means receiving side can use the magic +number for pktgen for differentiation. + +The implementation supports both VLAN tagged and untagged option. By +default, the minimum packet size is set at 64B. However, if VLAN tagged +is on (-V), the minimum packet size is increased to 66B just so to fit +the pktgen_hdr size. + +Added hex_dump() into the code path just for future cross-checking. +As before, simply change to "#define DEBUG_HEXDUMP 1" to inspect the +accuracy of TX packet. + +Signed-off-by: Ong Boon Leong +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211230035447.523177-8-boon.leong.ong@intel.com +Acked-by: Shung-Hsi Yu +--- + samples/bpf/xdpsock_user.c | 77 +++++++++++++++++++++++++++++++++++++++------ + 1 file changed, 68 insertions(+), 9 deletions(-) + +--- a/samples/bpf/xdpsock_user.c ++++ b/samples/bpf/xdpsock_user.c +@@ -111,6 +111,7 @@ static bool opt_app_stats; + static const char *opt_irq_str = ""; + static u32 irq_no; + static int irqs_at_init = -1; ++static u32 sequence; + static int opt_poll; + static int opt_interval = 1; + static int opt_retries = 3; +@@ -129,6 +130,7 @@ static clockid_t opt_clock = CLOCK_MONOT + static unsigned long opt_tx_cycle_ns; + static int opt_schpolicy = SCHED_OTHER; + static int opt_schprio = SCHED_PRI__DEFAULT; ++static bool opt_tstamp; + + struct vlan_ethhdr { + unsigned char h_dest[6]; +@@ -138,6 +140,14 @@ struct vlan_ethhdr { + __be16 h_vlan_encapsulated_proto; + }; + ++#define PKTGEN_MAGIC 0xbe9be955 ++struct pktgen_hdr { ++ __be32 pgh_magic; ++ __be32 seq_num; ++ __be32 tv_sec; ++ __be32 tv_usec; ++}; ++ + struct xsk_ring_stats { + unsigned long rx_npkts; + unsigned long tx_npkts; +@@ -836,18 +846,25 @@ static inline u16 udp_csum(u32 saddr, u3 + + #define ETH_HDR_SIZE (opt_vlan_tag ? sizeof(struct vlan_ethhdr) : \ + sizeof(struct ethhdr)) ++#define PKTGEN_HDR_SIZE (opt_tstamp ? sizeof(struct pktgen_hdr) : 0) + #define PKT_HDR_SIZE (ETH_HDR_SIZE + sizeof(struct iphdr) + \ +- sizeof(struct udphdr)) ++ sizeof(struct udphdr) + PKTGEN_HDR_SIZE) ++#define PKTGEN_HDR_OFFSET (ETH_HDR_SIZE + sizeof(struct iphdr) + \ ++ sizeof(struct udphdr)) ++#define PKTGEN_SIZE_MIN (PKTGEN_HDR_OFFSET + sizeof(struct pktgen_hdr) + \ ++ ETH_FCS_SIZE) + + #define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE) + #define IP_PKT_SIZE (PKT_SIZE - ETH_HDR_SIZE) + #define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) +-#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) ++#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - \ ++ (sizeof(struct udphdr) + PKTGEN_HDR_SIZE)) + + static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE]; + + static void gen_eth_hdr_data(void) + { ++ struct pktgen_hdr *pktgen_hdr; + struct udphdr *udp_hdr; + struct iphdr *ip_hdr; + +@@ -860,7 +877,10 @@ static void gen_eth_hdr_data(void) + sizeof(struct iphdr)); + ip_hdr = (struct iphdr *)(pkt_data + + sizeof(struct vlan_ethhdr)); +- ++ pktgen_hdr = (struct pktgen_hdr *)(pkt_data + ++ sizeof(struct vlan_ethhdr) + ++ sizeof(struct iphdr) + ++ sizeof(struct udphdr)); + /* ethernet & VLAN header */ + memcpy(veth_hdr->h_dest, &opt_txdmac, ETH_ALEN); + memcpy(veth_hdr->h_source, &opt_txsmac, ETH_ALEN); +@@ -877,7 +897,10 @@ static void gen_eth_hdr_data(void) + sizeof(struct iphdr)); + ip_hdr = (struct iphdr *)(pkt_data + + sizeof(struct ethhdr)); +- ++ pktgen_hdr = (struct pktgen_hdr *)(pkt_data + ++ sizeof(struct ethhdr) + ++ sizeof(struct iphdr) + ++ sizeof(struct udphdr)); + /* ethernet header */ + memcpy(eth_hdr->h_dest, &opt_txdmac, ETH_ALEN); + memcpy(eth_hdr->h_source, &opt_txsmac, ETH_ALEN); +@@ -906,6 +929,9 @@ static void gen_eth_hdr_data(void) + udp_hdr->dest = htons(0x1000); + udp_hdr->len = htons(UDP_PKT_SIZE); + ++ if (opt_tstamp) ++ pktgen_hdr->pgh_magic = htonl(PKTGEN_MAGIC); ++ + /* UDP data */ + memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern, + UDP_PKT_DATA_SIZE); +@@ -1049,6 +1075,7 @@ static struct option long_options[] = { + {"tx-dmac", required_argument, 0, 'G'}, + {"tx-smac", required_argument, 0, 'H'}, + {"tx-cycle", required_argument, 0, 'T'}, ++ {"tstamp", no_argument, 0, 'y'}, + {"policy", required_argument, 0, 'W'}, + {"schpri", required_argument, 0, 'U'}, + {"extra-stats", no_argument, 0, 'x'}, +@@ -1099,6 +1126,7 @@ static void usage(const char *prog) + " -G, --tx-dmac= Dest MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n" + " -H, --tx-smac= Src MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n" + " -T, --tx-cycle=n Tx cycle time in micro-seconds (For -t|--txonly).\n" ++ " -y, --tstamp Add time-stamp to packet (For -t|--txonly).\n" + " -W, --policy=POLICY Schedule policy. Default: SCHED_OTHER\n" + " -U, --schpri=n Schedule priority. Default: %d\n" + " -x, --extra-stats Display extra statistics.\n" +@@ -1125,7 +1153,7 @@ static void parse_command_line(int argc, + + for (;;) { + c = getopt_long(argc, argv, +- "Frtli:q:pSNn:w:O:czf:muMd:b:C:s:P:VJ:K:G:H:T:W:U:xQaI:BR", ++ "Frtli:q:pSNn:w:O:czf:muMd:b:C:s:P:VJ:K:G:H:T:yW:U:xQaI:BR", + long_options, &option_index); + if (c == -1) + break; +@@ -1246,6 +1274,9 @@ static void parse_command_line(int argc, + opt_tx_cycle_ns = atoi(optarg); + opt_tx_cycle_ns *= NSEC_PER_USEC; + break; ++ case 'y': ++ opt_tstamp = 1; ++ break; + case 'W': + if (get_schpolicy(&opt_schpolicy, optarg)) { + fprintf(stderr, +@@ -1462,9 +1493,10 @@ static void rx_drop_all(void) + } + } + +-static int tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) ++static int tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, ++ int batch_size, unsigned long tx_ns) + { +- u32 idx; ++ u32 idx, tv_sec, tv_usec; + unsigned int i; + + while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < +@@ -1474,11 +1506,31 @@ static int tx_only(struct xsk_socket_inf + return 0; + } + ++ if (opt_tstamp) { ++ tv_sec = (u32)(tx_ns / NSEC_PER_SEC); ++ tv_usec = (u32)((tx_ns % NSEC_PER_SEC) / 1000); ++ } ++ + for (i = 0; i < batch_size; i++) { + struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, + idx + i); + tx_desc->addr = (*frame_nb + i) * opt_xsk_frame_size; + tx_desc->len = PKT_SIZE; ++ ++ if (opt_tstamp) { ++ struct pktgen_hdr *pktgen_hdr; ++ u64 addr = tx_desc->addr; ++ char *pkt; ++ ++ pkt = xsk_umem__get_data(xsk->umem->buffer, addr); ++ pktgen_hdr = (struct pktgen_hdr *)(pkt + PKTGEN_HDR_OFFSET); ++ ++ pktgen_hdr->seq_num = htonl(sequence++); ++ pktgen_hdr->tv_sec = htonl(tv_sec); ++ pktgen_hdr->tv_usec = htonl(tv_usec); ++ ++ hex_dump(pkt, PKT_SIZE, addr); ++ } + } + + xsk_ring_prod__submit(&xsk->tx, batch_size); +@@ -1552,6 +1604,7 @@ static void tx_only_all(void) + + while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) { + int batch_size = get_batch_size(pkt_cnt); ++ unsigned long tx_ns = 0; + struct timespec next; + int tx_cnt = 0; + long diff; +@@ -1581,7 +1634,8 @@ static void tx_only_all(void) + } + + /* Measure periodic Tx scheduling variance */ +- diff = get_nsecs() - next_tx_ns; ++ tx_ns = get_nsecs(); ++ diff = tx_ns - next_tx_ns; + if (diff < tx_cycle_diff_min) + tx_cycle_diff_min = diff; + +@@ -1590,10 +1644,12 @@ static void tx_only_all(void) + + tx_cycle_diff_ave += (double)diff; + tx_cycle_cnt++; ++ } else if (opt_tstamp) { ++ tx_ns = get_nsecs(); + } + + for (i = 0; i < num_socks; i++) +- tx_cnt += tx_only(xsks[i], &frame_nb[i], batch_size); ++ tx_cnt += tx_only(xsks[i], &frame_nb[i], batch_size, tx_ns); + + pkt_cnt += tx_cnt; + +@@ -1895,6 +1951,9 @@ int main(int argc, char **argv) + apply_setsockopt(xsks[i]); + + if (opt_bench == BENCH_TXONLY) { ++ if (opt_tstamp && opt_pkt_size < PKTGEN_SIZE_MIN) ++ opt_pkt_size = PKTGEN_SIZE_MIN; ++ + gen_eth_hdr_data(); + + for (i = 0; i < NUM_FRAMES; i++) diff --git a/patches.suse/sbitmap-add-helper-to-clear-a-batch-of-tags.patch b/patches.suse/sbitmap-add-helper-to-clear-a-batch-of-tags.patch new file mode 100644 index 0000000..3b17304 --- /dev/null +++ b/patches.suse/sbitmap-add-helper-to-clear-a-batch-of-tags.patch @@ -0,0 +1,104 @@ +From: Jens Axboe +Date: Fri, 8 Oct 2021 05:44:23 -0600 +Subject: [PATCH] sbitmap: add helper to clear a batch of tags +Git-commit: 1aec5e4a2962f7e0b3fb3e7308dd726be2472c26 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +sbitmap currently only supports clearing tags one-by-one, add a helper +that allows the caller to pass in an array of tags to clear. + +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + include/linux/sbitmap.h | 11 +++++++++++ + lib/sbitmap.c | 44 ++++++++++++++++++++++++++++++++++++++--- + 2 files changed, 52 insertions(+), 3 deletions(-) + +diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h +index e30b56023ead..4a6ff274335a 100644 +--- a/include/linux/sbitmap.h ++++ b/include/linux/sbitmap.h +@@ -528,6 +528,17 @@ void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, + void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, + unsigned int cpu); + ++/** ++ * sbitmap_queue_clear_batch() - Free a batch of allocated bits ++ * &struct sbitmap_queue. ++ * @sbq: Bitmap to free from. ++ * @offset: offset for each tag in array ++ * @tags: array of tags ++ * @nr_tags: number of tags in array ++ */ ++void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset, ++ int *tags, int nr_tags); ++ + static inline int sbq_index_inc(int index) + { + return (index + 1) & (SBQ_WAIT_QUEUES - 1); +diff --git a/lib/sbitmap.c b/lib/sbitmap.c +index f398e0ae548e..c6e2f1f2c4d2 100644 +--- a/lib/sbitmap.c ++++ b/lib/sbitmap.c +@@ -628,6 +628,46 @@ void sbitmap_queue_wake_up(struct sbitmap_queue *sbq) + } + EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); + ++static inline void sbitmap_update_cpu_hint(struct sbitmap *sb, int cpu, int tag) ++{ ++ if (likely(!sb->round_robin && tag < sb->depth)) ++ *per_cpu_ptr(sb->alloc_hint, cpu) = tag; ++} ++ ++void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset, ++ int *tags, int nr_tags) ++{ ++ struct sbitmap *sb = &sbq->sb; ++ unsigned long *addr = NULL; ++ unsigned long mask = 0; ++ int i; ++ ++ smp_mb__before_atomic(); ++ for (i = 0; i < nr_tags; i++) { ++ const int tag = tags[i] - offset; ++ unsigned long *this_addr; ++ ++ /* since we're clearing a batch, skip the deferred map */ ++ this_addr = &sb->map[SB_NR_TO_INDEX(sb, tag)].word; ++ if (!addr) { ++ addr = this_addr; ++ } else if (addr != this_addr) { ++ atomic_long_andnot(mask, (atomic_long_t *) addr); ++ mask = 0; ++ addr = this_addr; ++ } ++ mask |= (1UL << SB_NR_TO_BIT(sb, tag)); ++ } ++ ++ if (mask) ++ atomic_long_andnot(mask, (atomic_long_t *) addr); ++ ++ smp_mb__after_atomic(); ++ sbitmap_queue_wake_up(sbq); ++ sbitmap_update_cpu_hint(&sbq->sb, raw_smp_processor_id(), ++ tags[nr_tags - 1] - offset); ++} ++ + void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, + unsigned int cpu) + { +@@ -652,9 +692,7 @@ void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, + */ + smp_mb__after_atomic(); + sbitmap_queue_wake_up(sbq); +- +- if (likely(!sbq->sb.round_robin && nr < sbq->sb.depth)) +- *per_cpu_ptr(sbq->sb.alloc_hint, cpu) = nr; ++ sbitmap_update_cpu_hint(&sbq->sb, cpu, nr); + } + EXPORT_SYMBOL_GPL(sbitmap_queue_clear); + +-- +2.35.3 + diff --git a/patches.suse/sbitmap-silence-data-race-warning.patch b/patches.suse/sbitmap-silence-data-race-warning.patch new file mode 100644 index 0000000..76b0330 --- /dev/null +++ b/patches.suse/sbitmap-silence-data-race-warning.patch @@ -0,0 +1,81 @@ +From: Jens Axboe +Date: Mon, 25 Oct 2021 10:45:01 -0600 +Subject: [PATCH] sbitmap: silence data race warning +Git-commit: 9f8b93a7df4d8e1e8715fb2a45a893cffad9da0b +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +KCSAN complaints about the sbitmap hint update: + +================================================================== +BUG: KCSAN: data-race in sbitmap_queue_clear / sbitmap_queue_clear + +write to 0xffffe8ffffd145b8 of 4 bytes by interrupt on cpu 1: + sbitmap_queue_clear+0xca/0xf0 lib/sbitmap.c:606 + blk_mq_put_tag+0x82/0x90 + __blk_mq_free_request+0x114/0x180 block/blk-mq.c:507 + blk_mq_free_request+0x2c8/0x340 block/blk-mq.c:541 + __blk_mq_end_request+0x214/0x230 block/blk-mq.c:565 + blk_mq_end_request+0x37/0x50 block/blk-mq.c:574 + lo_complete_rq+0xca/0x170 drivers/block/loop.c:541 + blk_complete_reqs block/blk-mq.c:584 [inline] + blk_done_softirq+0x69/0x90 block/blk-mq.c:589 + __do_softirq+0x12c/0x26e kernel/softirq.c:558 + run_ksoftirqd+0x13/0x20 kernel/softirq.c:920 + smpboot_thread_fn+0x22f/0x330 kernel/smpboot.c:164 + kthread+0x262/0x280 kernel/kthread.c:319 + ret_from_fork+0x1f/0x30 + +write to 0xffffe8ffffd145b8 of 4 bytes by interrupt on cpu 0: + sbitmap_queue_clear+0xca/0xf0 lib/sbitmap.c:606 + blk_mq_put_tag+0x82/0x90 + __blk_mq_free_request+0x114/0x180 block/blk-mq.c:507 + blk_mq_free_request+0x2c8/0x340 block/blk-mq.c:541 + __blk_mq_end_request+0x214/0x230 block/blk-mq.c:565 + blk_mq_end_request+0x37/0x50 block/blk-mq.c:574 + lo_complete_rq+0xca/0x170 drivers/block/loop.c:541 + blk_complete_reqs block/blk-mq.c:584 [inline] + blk_done_softirq+0x69/0x90 block/blk-mq.c:589 + __do_softirq+0x12c/0x26e kernel/softirq.c:558 + run_ksoftirqd+0x13/0x20 kernel/softirq.c:920 + smpboot_thread_fn+0x22f/0x330 kernel/smpboot.c:164 + kthread+0x262/0x280 kernel/kthread.c:319 + ret_from_fork+0x1f/0x30 + +value changed: 0x00000035 -> 0x00000044 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 0 PID: 10 Comm: ksoftirqd/0 Not tainted 5.15.0-rc6-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +================================================================== + +which is a data race, but not an important one. This is just updating the +percpu alloc hint, and the reader of that hint doesn't ever require it to +be valid. + +Just annotate it with data_race() to silence this one. + +Reported-by: syzbot+4f8bfd804b4a1f95b8f6@syzkaller.appspotmail.com +Acked-by: Marco Elver +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + lib/sbitmap.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/sbitmap.c b/lib/sbitmap.c +index c6e2f1f2c4d2..2709ab825499 100644 +--- a/lib/sbitmap.c ++++ b/lib/sbitmap.c +@@ -631,7 +631,7 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); + static inline void sbitmap_update_cpu_hint(struct sbitmap *sb, int cpu, int tag) + { + if (likely(!sb->round_robin && tag < sb->depth)) +- *per_cpu_ptr(sb->alloc_hint, cpu) = tag; ++ data_race(*per_cpu_ptr(sb->alloc_hint, cpu) = tag); + } + + void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset, +-- +2.35.3 + diff --git a/patches.suse/sched-make-task_struct-plug-always-defined.patch b/patches.suse/sched-make-task_struct-plug-always-defined.patch new file mode 100644 index 0000000..f3772a1 --- /dev/null +++ b/patches.suse/sched-make-task_struct-plug-always-defined.patch @@ -0,0 +1,51 @@ +From: Jens Axboe +Date: Fri, 22 Oct 2021 19:35:45 -0600 +Subject: [PATCH] sched: make task_struct->plug always defined +Git-commit: 599593a82fc57f5e9453c8ef7420df3206934a0c +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +If CONFIG_BLOCK isn't set, then it's an empty struct anyway. Just make +it generally available, so we don't break the compile: + +kernel/sched/core.c: In function ‘sched_submit_work’: +kernel/sched/core.c:6346:35: error: ‘struct task_struct’ has no member named ‘plug’ + 6346 | blk_flush_plug(tsk->plug, true); + | ^~ +kernel/sched/core.c: In function ‘io_schedule_prepare’: +kernel/sched/core.c:8357:20: error: ‘struct task_struct’ has no member named ‘plug’ + 8357 | if (current->plug) + | ^~ +kernel/sched/core.c:8358:39: error: ‘struct task_struct’ has no member named ‘plug’ + 8358 | blk_flush_plug(current->plug, true); + | ^~ + +Reported-by: Nathan Chancellor +Fixes: 008f75a20e70 ("block: cleanup the flush plug helpers") +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + include/linux/sched.h | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index c1a927ddec64..e0454e60fe8f 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1160,10 +1160,8 @@ struct task_struct { + /* Stacked block device info: */ + struct bio_list *bio_list; + +-#ifdef CONFIG_BLOCK + /* Stack plugging: */ + struct blk_plug *plug; +-#endif + + /* VM state: */ + struct reclaim_state *reclaim_state; +-- +2.35.3 + diff --git a/patches.suse/scsi-add-a-scsi_alloc_request-helper.patch b/patches.suse/scsi-add-a-scsi_alloc_request-helper.patch new file mode 100644 index 0000000..65e7e5a --- /dev/null +++ b/patches.suse/scsi-add-a-scsi_alloc_request-helper.patch @@ -0,0 +1,246 @@ +From: Christoph Hellwig +Date: Thu, 21 Oct 2021 08:06:05 +0200 +Subject: [PATCH] scsi: add a scsi_alloc_request helper +Git-commit: 68ec3b819a5d600a4ede8b596761dccac9f39ebc +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Add a new helper that calls blk_get_request and initializes the +scsi_request to avoid the indirect call through ->.initialize_rq_fn. + +Note that this makes the pktcdvd driver depend on the SCSI core, but +given that only SCSI devices support SCSI passthrough requests that +is not a functional change. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Hannes Reinecke +Link: https://lore.kernel.org/r/20211021060607.264371-6-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/Kconfig | 2 +- + drivers/block/pktcdvd.c | 2 +- + drivers/scsi/scsi_bsg.c | 4 ++-- + drivers/scsi/scsi_error.c | 2 +- + drivers/scsi/scsi_ioctl.c | 4 ++-- + drivers/scsi/scsi_lib.c | 19 +++++++++++++------ + drivers/scsi/sg.c | 4 ++-- + drivers/scsi/sr.c | 2 +- + drivers/scsi/st.c | 2 +- + drivers/target/target_core_pscsi.c | 3 +-- + include/scsi/scsi_cmnd.h | 3 +++ + 11 files changed, 28 insertions(+), 19 deletions(-) + +diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig +index ab3e37aa1830..9151e8ffba1c 100644 +--- a/drivers/block/Kconfig ++++ b/drivers/block/Kconfig +@@ -304,8 +304,8 @@ config BLK_DEV_RAM_SIZE + config CDROM_PKTCDVD + tristate "Packet writing on CD/DVD media (DEPRECATED)" + depends on !UML ++ depends on SCSI + select CDROM +- select SCSI_COMMON + help + Note: This driver is deprecated and will be removed from the + kernel in the near future! +diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c +index cb52cce6fb03..ea2262ec76d2 100644 +--- a/drivers/block/pktcdvd.c ++++ b/drivers/block/pktcdvd.c +@@ -703,7 +703,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * + struct request *rq; + int ret = 0; + +- rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? ++ rq = scsi_alloc_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); + if (IS_ERR(rq)) + return PTR_ERR(rq); +diff --git a/drivers/scsi/scsi_bsg.c b/drivers/scsi/scsi_bsg.c +index 81c3853a2a80..551727a6f694 100644 +--- a/drivers/scsi/scsi_bsg.c ++++ b/drivers/scsi/scsi_bsg.c +@@ -25,8 +25,8 @@ static int scsi_bsg_sg_io_fn(struct request_queue *q, struct sg_io_v4 *hdr, + return -EOPNOTSUPP; + } + +- rq = blk_get_request(q, hdr->dout_xfer_len ? +- REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); ++ rq = scsi_alloc_request(q, hdr->dout_xfer_len ? ++ REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); + if (IS_ERR(rq)) + return PTR_ERR(rq); + rq->timeout = timeout; +diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c +index b6c86cce57bf..71d027b94be4 100644 +--- a/drivers/scsi/scsi_error.c ++++ b/drivers/scsi/scsi_error.c +@@ -1998,7 +1998,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev) + struct request *req; + struct scsi_request *rq; + +- req = blk_get_request(sdev->request_queue, REQ_OP_DRV_IN, 0); ++ req = scsi_alloc_request(sdev->request_queue, REQ_OP_DRV_IN, 0); + if (IS_ERR(req)) + return; + rq = scsi_req(req); +diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c +index 6ff2207bd45a..0078975e3c07 100644 +--- a/drivers/scsi/scsi_ioctl.c ++++ b/drivers/scsi/scsi_ioctl.c +@@ -438,7 +438,7 @@ static int sg_io(struct scsi_device *sdev, struct gendisk *disk, + at_head = 1; + + ret = -ENOMEM; +- rq = blk_get_request(sdev->request_queue, writing ? ++ rq = scsi_alloc_request(sdev->request_queue, writing ? + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); + if (IS_ERR(rq)) + return PTR_ERR(rq); +@@ -561,7 +561,7 @@ static int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, + + } + +- rq = blk_get_request(q, in_len ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); ++ rq = scsi_alloc_request(q, in_len ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto error_free_buffer; +diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c +index 30f7d0b4eb73..a0f801fc8943 100644 +--- a/drivers/scsi/scsi_lib.c ++++ b/drivers/scsi/scsi_lib.c +@@ -216,7 +216,7 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, + struct scsi_request *rq; + int ret; + +- req = blk_get_request(sdev->request_queue, ++ req = scsi_alloc_request(sdev->request_queue, + data_direction == DMA_TO_DEVICE ? + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, + rq_flags & RQF_PM ? BLK_MQ_REQ_PM : 0); +@@ -1079,9 +1079,6 @@ EXPORT_SYMBOL(scsi_alloc_sgtables); + * This function initializes the members of struct scsi_cmnd that must be + * initialized before request processing starts and that won't be + * reinitialized if a SCSI command is requeued. +- * +- * Called from inside blk_get_request() for pass-through requests and from +- * inside scsi_init_command() for filesystem requests. + */ + static void scsi_initialize_rq(struct request *rq) + { +@@ -1098,6 +1095,18 @@ static void scsi_initialize_rq(struct request *rq) + cmd->retries = 0; + } + ++struct request *scsi_alloc_request(struct request_queue *q, ++ unsigned int op, blk_mq_req_flags_t flags) ++{ ++ struct request *rq; ++ ++ rq = blk_get_request(q, op, flags); ++ if (!IS_ERR(rq)) ++ scsi_initialize_rq(rq); ++ return rq; ++} ++EXPORT_SYMBOL_GPL(scsi_alloc_request); ++ + /* + * Only called when the request isn't completed by SCSI, and not freed by + * SCSI +@@ -1864,7 +1873,6 @@ static const struct blk_mq_ops scsi_mq_ops_no_commit = { + #endif + .init_request = scsi_mq_init_request, + .exit_request = scsi_mq_exit_request, +- .initialize_rq_fn = scsi_initialize_rq, + .cleanup_rq = scsi_cleanup_rq, + .busy = scsi_mq_lld_busy, + .map_queues = scsi_map_queues, +@@ -1894,7 +1902,6 @@ static const struct blk_mq_ops scsi_mq_ops = { + #endif + .init_request = scsi_mq_init_request, + .exit_request = scsi_mq_exit_request, +- .initialize_rq_fn = scsi_initialize_rq, + .cleanup_rq = scsi_cleanup_rq, + .busy = scsi_mq_lld_busy, + .map_queues = scsi_map_queues, +diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c +index 3c98f08dc25d..85f57ac0b844 100644 +--- a/drivers/scsi/sg.c ++++ b/drivers/scsi/sg.c +@@ -1718,13 +1718,13 @@ sg_start_req(Sg_request *srp, unsigned char *cmd) + * + * With scsi-mq enabled, there are a fixed number of preallocated + * requests equal in number to shost->can_queue. If all of the +- * preallocated requests are already in use, then blk_get_request() ++ * preallocated requests are already in use, then scsi_alloc_request() + * will sleep until an active command completes, freeing up a request. + * Although waiting in an asynchronous interface is less than ideal, we + * do not want to use BLK_MQ_REQ_NOWAIT here because userspace might + * not expect an EWOULDBLOCK from this condition. + */ +- rq = blk_get_request(q, hp->dxfer_direction == SG_DXFER_TO_DEV ? ++ rq = scsi_alloc_request(q, hp->dxfer_direction == SG_DXFER_TO_DEV ? + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); + if (IS_ERR(rq)) { + kfree(long_cmdp); +diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c +index 115f7ef7a5de..7c4d9a964799 100644 +--- a/drivers/scsi/sr.c ++++ b/drivers/scsi/sr.c +@@ -967,7 +967,7 @@ static int sr_read_cdda_bpc(struct cdrom_device_info *cdi, void __user *ubuf, + struct bio *bio; + int ret; + +- rq = blk_get_request(disk->queue, REQ_OP_DRV_IN, 0); ++ rq = scsi_alloc_request(disk->queue, REQ_OP_DRV_IN, 0); + if (IS_ERR(rq)) + return PTR_ERR(rq); + req = scsi_req(rq); +diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c +index 9933722acfd9..1275299f6159 100644 +--- a/drivers/scsi/st.c ++++ b/drivers/scsi/st.c +@@ -543,7 +543,7 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd, + int err = 0; + struct scsi_tape *STp = SRpnt->stp; + +- req = blk_get_request(SRpnt->stp->device->request_queue, ++ req = scsi_alloc_request(SRpnt->stp->device->request_queue, + data_direction == DMA_TO_DEVICE ? + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); + if (IS_ERR(req)) +diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c +index 75ef52f008ff..b5705a2bd761 100644 +--- a/drivers/target/target_core_pscsi.c ++++ b/drivers/target/target_core_pscsi.c +@@ -980,11 +980,10 @@ pscsi_execute_cmd(struct se_cmd *cmd) + memcpy(pt->pscsi_cdb, cmd->t_task_cdb, + scsi_command_size(cmd->t_task_cdb)); + +- req = blk_get_request(pdv->pdv_sd->request_queue, ++ req = scsi_alloc_request(pdv->pdv_sd->request_queue, + cmd->data_direction == DMA_TO_DEVICE ? + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); + if (IS_ERR(req)) { +- pr_err("PSCSI: blk_get_request() failed\n"); + ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + goto fail; + } +diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h +index eaf04c9a1dfc..31078063afac 100644 +--- a/include/scsi/scsi_cmnd.h ++++ b/include/scsi/scsi_cmnd.h +@@ -396,4 +396,7 @@ static inline unsigned scsi_transfer_length(struct scsi_cmnd *scmd) + extern void scsi_build_sense(struct scsi_cmnd *scmd, int desc, + u8 key, u8 asc, u8 ascq); + ++struct request *scsi_alloc_request(struct request_queue *q, ++ unsigned int op, blk_mq_req_flags_t flags); ++ + #endif /* _SCSI_SCSI_CMND_H */ +-- +2.35.3 + diff --git a/patches.suse/scsi-do-not-put-scsi_common-in-a-separate-module.patch b/patches.suse/scsi-do-not-put-scsi_common-in-a-separate-module.patch new file mode 100644 index 0000000..b7581df --- /dev/null +++ b/patches.suse/scsi-do-not-put-scsi_common-in-a-separate-module.patch @@ -0,0 +1,37 @@ +From: Hannes Reinecke +Date: Wed, 23 Nov 2022 13:54:24 +0100 +Subject: [PATCH] scsi: do not put scsi_common in a separate module +Patch-Mainline: submitted to linux-scsi 2022/11/24 +References: jsc#PED-1183 + +scsi_common.ko is a tiny module which is not shared with anything, +so include it in scsi_mod.ko like the rest of the files. + +Signed-off-by: Hannes Reinecke +--- + drivers/scsi/Makefile | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile +index f055bfd54a68..d9f5f27246b1 100644 +--- a/drivers/scsi/Makefile ++++ b/drivers/scsi/Makefile +@@ -20,7 +20,6 @@ CFLAGS_aha152x.o = -DAHA152X_STAT -DAUTOCONF + obj-$(CONFIG_PCMCIA) += pcmcia/ + + obj-$(CONFIG_SCSI) += scsi_mod.o +-obj-$(CONFIG_SCSI_COMMON) += scsi_common.o + + obj-$(CONFIG_RAID_ATTRS) += raid_class.o + +@@ -167,6 +166,7 @@ scsi_mod-y += scsi_trace.o scsi_logging.o + scsi_mod-$(CONFIG_PM) += scsi_pm.o + scsi_mod-$(CONFIG_SCSI_DH) += scsi_dh.o + scsi_mod-$(CONFIG_BLK_DEV_BSG) += scsi_bsg.o ++scsi_mod-$(CONFIG_SCSI_COMMON) += scsi_common.o + + hv_storvsc-y := storvsc_drv.o + +-- +2.35.3 + diff --git a/patches.suse/scsi-hpsa-Remove-an-unused-variable-in-hpsa_update_s.patch b/patches.suse/scsi-hpsa-Remove-an-unused-variable-in-hpsa_update_s.patch new file mode 100644 index 0000000..1f529b4 --- /dev/null +++ b/patches.suse/scsi-hpsa-Remove-an-unused-variable-in-hpsa_update_s.patch @@ -0,0 +1,43 @@ +From: Christophe JAILLET +Date: Thu, 9 Dec 2021 22:11:56 +0100 +Subject: scsi: hpsa: Remove an unused variable in hpsa_update_scsi_devices() +Patch-mainline: v5.17-rc1 +Git-commit: 8c2d04551545d3722c1e6891ecce46f44c5406ec +References: jsc#PED-1558 + +'lunzerobits' is unused. Remove it. + +This a left over of commit 2d62a33e05d4 ("hpsa: eliminate fake lun0 +enclosures") + +Link: https://lore.kernel.org/r/9f80ea569867b5f7ae1e0f99d656e5a8bacad34e.1639084205.git.christophe.jaillet@wanadoo.fr +Signed-off-by: Christophe JAILLET +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/hpsa.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c +index cdf3328cc065..a47bcce3c9c7 100644 +--- a/drivers/scsi/hpsa.c ++++ b/drivers/scsi/hpsa.c +@@ -4354,7 +4354,6 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h) + int i, ndevs_to_allocate; + int raid_ctlr_position; + bool physical_device; +- DECLARE_BITMAP(lunzerobits, MAX_EXT_TARGETS); + + currentsd = kcalloc(HPSA_MAX_DEVICES, sizeof(*currentsd), GFP_KERNEL); + physdev_list = kzalloc(sizeof(*physdev_list), GFP_KERNEL); +@@ -4368,7 +4367,6 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h) + dev_err(&h->pdev->dev, "out of memory\n"); + goto out; + } +- memset(lunzerobits, 0, sizeof(lunzerobits)); + + h->drv_req_rescan = 0; /* cancel scheduled rescan - we're doing it. */ + +-- +2.38.0 + diff --git a/patches.suse/scsi-hpsa-Simplify-clear-set-_bit-parameters.patch b/patches.suse/scsi-hpsa-Simplify-clear-set-_bit-parameters.patch new file mode 100644 index 0000000..2ce42b2 --- /dev/null +++ b/patches.suse/scsi-hpsa-Simplify-clear-set-_bit-parameters.patch @@ -0,0 +1,47 @@ +From: Christophe JAILLET +Date: Wed, 20 Jul 2022 20:14:02 +0200 +Subject: scsi: hpsa: Simplify {clear|set}_bit() parameters +Patch-mainline: v6.1-rc1 +Git-commit: e95b305addc976f1b163d1f5af063402d530a361 +References: jsc#PED-1558 + +{clear|set}_bit() can take an almost arbitrarily large bit number, so there +is no need to manually compute addresses. This is just redundant. + +Link: https://lore.kernel.org/r/c3429a22023f58e5e5cc65d6cd7e83fb2bd9b870.1658340442.git.christophe.jaillet@wanadoo.fr +Tested-by: Don Brace +Acked-by: Don Brace +Signed-off-by: Christophe JAILLET +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/hpsa.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c +index 0612ca681200..f8e832b1bc46 100644 +--- a/drivers/scsi/hpsa.c ++++ b/drivers/scsi/hpsa.c +@@ -6233,8 +6233,7 @@ static struct CommandList *cmd_alloc(struct ctlr_info *h) + offset = (i + 1) % HPSA_NRESERVED_CMDS; + continue; + } +- set_bit(i & (BITS_PER_LONG - 1), +- h->cmd_pool_bits + (i / BITS_PER_LONG)); ++ set_bit(i, h->cmd_pool_bits); + break; /* it's ours now. */ + } + hpsa_cmd_partial_init(h, i, c); +@@ -6261,8 +6260,7 @@ static void cmd_free(struct ctlr_info *h, struct CommandList *c) + int i; + + i = c - h->cmd_pool; +- clear_bit(i & (BITS_PER_LONG - 1), +- h->cmd_pool_bits + (i / BITS_PER_LONG)); ++ clear_bit(i, h->cmd_pool_bits); + } + } + +-- +2.38.0 + diff --git a/patches.suse/scsi-hpsa-Use-the-bitmap-API-to-allocate-bitmaps.patch b/patches.suse/scsi-hpsa-Use-the-bitmap-API-to-allocate-bitmaps.patch new file mode 100644 index 0000000..e75fbc7 --- /dev/null +++ b/patches.suse/scsi-hpsa-Use-the-bitmap-API-to-allocate-bitmaps.patch @@ -0,0 +1,47 @@ +From: Christophe JAILLET +Date: Wed, 20 Jul 2022 20:13:54 +0200 +Subject: scsi: hpsa: Use the bitmap API to allocate bitmaps +Patch-mainline: v6.1-rc1 +Git-commit: 5afdd990ce2ea178eb6cbd31b197cc3d12a675bf +References: jsc#PED-1558 + +Use bitmap_zalloc()/bitmap_free() instead of hand-writing them. It is less +verbose and it improves the semantic. + +Link: https://lore.kernel.org/r/5f975ef43f8b7306e4ac4e2e8ce4bcd53f6092bb.1658340441.git.christophe.jaillet@wanadoo.fr +Tested-by: Don Brace +Acked-by: Don Brace +Signed-off-by: Christophe JAILLET +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/hpsa.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c +index a47bcce3c9c7..0612ca681200 100644 +--- a/drivers/scsi/hpsa.c ++++ b/drivers/scsi/hpsa.c +@@ -8030,7 +8030,7 @@ static int hpsa_init_reset_devices(struct pci_dev *pdev, u32 board_id) + + static void hpsa_free_cmd_pool(struct ctlr_info *h) + { +- kfree(h->cmd_pool_bits); ++ bitmap_free(h->cmd_pool_bits); + h->cmd_pool_bits = NULL; + if (h->cmd_pool) { + dma_free_coherent(&h->pdev->dev, +@@ -8052,9 +8052,7 @@ static void hpsa_free_cmd_pool(struct ctlr_info *h) + + static int hpsa_alloc_cmd_pool(struct ctlr_info *h) + { +- h->cmd_pool_bits = kcalloc(DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG), +- sizeof(unsigned long), +- GFP_KERNEL); ++ h->cmd_pool_bits = bitmap_zalloc(h->nr_cmds, GFP_KERNEL); + h->cmd_pool = dma_alloc_coherent(&h->pdev->dev, + h->nr_cmds * sizeof(*h->cmd_pool), + &h->cmd_pool_dhandle, GFP_KERNEL); +-- +2.38.0 + diff --git a/patches.suse/scsi-megaraid-Convert-sysfs-snprintf-to-sysfs_emit.patch b/patches.suse/scsi-megaraid-Convert-sysfs-snprintf-to-sysfs_emit.patch new file mode 100644 index 0000000..cddbea3 --- /dev/null +++ b/patches.suse/scsi-megaraid-Convert-sysfs-snprintf-to-sysfs_emit.patch @@ -0,0 +1,38 @@ +From: Xuezhi Zhang +Date: Wed, 31 Aug 2022 22:03:25 +0800 +Subject: scsi: megaraid: Convert sysfs snprintf() to sysfs_emit() +Patch-mainline: v6.1-rc1 +Git-commit: 68a97feb4b501025540bc60c3d0824d66a508002 +References: jsc#PED-1490 + +Fix up sysfs show entries to use sysfs_emit() + +Link: https://lore.kernel.org/r/20220831140325.396295-1-zhangxuezhi3@gmail.com +Reviewed-by: Damien Le Moal +Signed-off-by: Xuezhi Zhang +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/megaraid/megaraid_mbox.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/scsi/megaraid/megaraid_mbox.c ++++ b/drivers/scsi/megaraid/megaraid_mbox.c +@@ -3979,7 +3979,7 @@ megaraid_mbox_app_hndl_show(struct devic + + app_hndl = mraid_mm_adapter_app_handle(adapter->unique_id); + +- return snprintf(buf, 8, "%u\n", app_hndl); ++ return sysfs_emit(buf, "%u\n", app_hndl); + } + + +@@ -4048,7 +4048,7 @@ megaraid_mbox_ld_show(struct device *dev + } + } + +- return snprintf(buf, 36, "%d %d %d %d\n", scsi_id, logical_drv, ++ return sysfs_emit(buf, "%d %d %d %d\n", scsi_id, logical_drv, + ldid_map, app_hndl); + } + diff --git a/patches.suse/scsi-megaraid-Fix-a-kernel-doc-warning.patch b/patches.suse/scsi-megaraid-Fix-a-kernel-doc-warning.patch new file mode 100644 index 0000000..6e5baf3 --- /dev/null +++ b/patches.suse/scsi-megaraid-Fix-a-kernel-doc-warning.patch @@ -0,0 +1,30 @@ +From: Bart Van Assche +Date: Mon, 29 Nov 2021 11:46:06 -0800 +Subject: scsi: megaraid: Fix a kernel-doc warning +Patch-mainline: v5.17-rc1 +Git-commit: acad9c4324992b6fcfe4f714a3b6f3a8cf8af929 +References: jsc#PED-1490 + +Fix the following kernel-doc warning: + +drivers/scsi/megaraid/megaraid_mbox.c:1439: warning: Excess function parameter 'done' description in 'megaraid_queue_command_lck' + +Link: https://lore.kernel.org/r/20211129194609.3466071-10-bvanassche@acm.org +Fixes: af049dfd0b10 ("scsi: core: Remove the 'done' argument from SCSI queuecommand_lck functions") +Signed-off-by: Bart Van Assche +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/megaraid/megaraid_mbox.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/scsi/megaraid/megaraid_mbox.c ++++ b/drivers/scsi/megaraid/megaraid_mbox.c +@@ -1431,7 +1431,6 @@ mbox_post_cmd(adapter_t *adapter, scb_t + /** + * megaraid_queue_command_lck - generic queue entry point for all LLDs + * @scp : pointer to the scsi command to be executed +- * @done : callback routine to be called after the cmd has be completed + * + * Queue entry point for mailbox based controllers. + */ diff --git a/patches.suse/scsi-megaraid-Remove-redundant-assignment-to-variabl.patch b/patches.suse/scsi-megaraid-Remove-redundant-assignment-to-variabl.patch new file mode 100644 index 0000000..602547c --- /dev/null +++ b/patches.suse/scsi-megaraid-Remove-redundant-assignment-to-variabl.patch @@ -0,0 +1,39 @@ +From: Colin Ian King +Date: Fri, 5 Aug 2022 12:50:42 +0100 +Subject: scsi: megaraid: Remove redundant assignment to variable mfiStatus +Patch-mainline: v6.1-rc1 +Git-commit: 4e62671a9a49c0f226311cbac08cdb64e905ab47 +References: jsc#PED-1490 + +The variable mfiStatus is assigned a value but it is never read. The +assignment is redundant and can be removed. Also remove { } as the return +statement does not need to be in its own code block. + +Cleans up clang scan build warning: + +drivers/scsi/megaraid/megaraid_sas_base.c:4026:7: warning: Although the +value stored to 'mfiStatus' is used in the enclosing expression, the +value is never actually read from 'mfiStatus' [deadcode.DeadStores] + +Link: https://lore.kernel.org/r/20220805115042.2340400-1-colin.i.king@gmail.com +Signed-off-by: Colin Ian King +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/megaraid/megaraid_sas_base.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/drivers/scsi/megaraid/megaraid_sas_base.c ++++ b/drivers/scsi/megaraid/megaraid_sas_base.c +@@ -4023,10 +4023,8 @@ megasas_deplete_reply_queue(struct megas + u32 mfiStatus; + u32 fw_state; + +- if ((mfiStatus = instance->instancet->check_reset(instance, +- instance->reg_set)) == 1) { ++ if (instance->instancet->check_reset(instance, instance->reg_set) == 1) + return IRQ_HANDLED; +- } + + mfiStatus = instance->instancet->clear_intr(instance); + if (mfiStatus == 0) { diff --git a/patches.suse/scsi-megaraid-Remove-the-static-variable-initialisat.patch b/patches.suse/scsi-megaraid-Remove-the-static-variable-initialisat.patch new file mode 100644 index 0000000..bb6effe --- /dev/null +++ b/patches.suse/scsi-megaraid-Remove-the-static-variable-initialisat.patch @@ -0,0 +1,29 @@ +From: Jason Wang +Date: Sat, 23 Jul 2022 17:16:20 +0800 +Subject: scsi: megaraid: Remove the static variable initialisation +Patch-mainline: v6.0-rc1 +Git-commit: 68126eeb6df6cb53aae4dc450ec20792e1819861 +References: jsc#PED-1490 + +Initialising global and static variables to 0 is unnecessary. Remove the +initialisation. + +Link: https://lore.kernel.org/r/20220723091620.5463-1-wangborong@cdjrlc.com +Signed-off-by: Jason Wang +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/megaraid/megaraid_mbox.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/scsi/megaraid/megaraid_mbox.c ++++ b/drivers/scsi/megaraid/megaraid_mbox.c +@@ -181,7 +181,7 @@ MODULE_PARM_DESC(cmd_per_lun, + * This would result in non-disk devices being skipped during driver load + * time. These can be later added though, using /proc/scsi/scsi + */ +-static unsigned int megaraid_fast_load = 0; ++static unsigned int megaraid_fast_load; + module_param_named(fast_load, megaraid_fast_load, int, 0); + MODULE_PARM_DESC(fast_load, + "Faster loading of the driver, skips physical devices! (default=0)"); diff --git a/patches.suse/scsi-megaraid_sas-Clean-up-some-inconsistent-indenti.patch b/patches.suse/scsi-megaraid_sas-Clean-up-some-inconsistent-indenti.patch new file mode 100644 index 0000000..aa4a5be --- /dev/null +++ b/patches.suse/scsi-megaraid_sas-Clean-up-some-inconsistent-indenti.patch @@ -0,0 +1,33 @@ +From: Jiapeng Chong +Date: Thu, 30 Jun 2022 15:41:52 +0800 +Subject: scsi: megaraid_sas: Clean up some inconsistent indenting +Patch-mainline: v6.0-rc1 +Git-commit: acd1a2786c568ee7c254eda9c60eabc401d13c04 +References: jsc#PED-1490 + +This was found by coccicheck: + +drivers/scsi/megaraid/megaraid_sas_base.c:3950 process_fw_state_change_wq() warn: inconsistent indenting. + +Link: https://lore.kernel.org/r/20220630074152.29171-1-jiapeng.chong@linux.alibaba.com +Signed-off-by: Jiapeng Chong +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/megaraid/megaraid_sas_base.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/scsi/megaraid/megaraid_sas_base.c ++++ b/drivers/scsi/megaraid/megaraid_sas_base.c +@@ -3950,9 +3950,9 @@ process_fw_state_change_wq(struct work_s + u32 wait; + unsigned long flags; + +- if (atomic_read(&instance->adprecovery) != MEGASAS_ADPRESET_SM_INFAULT) { ++ if (atomic_read(&instance->adprecovery) != MEGASAS_ADPRESET_SM_INFAULT) { + dev_notice(&instance->pdev->dev, "error, recovery st %x\n", +- atomic_read(&instance->adprecovery)); ++ atomic_read(&instance->adprecovery)); + return ; + } + diff --git a/patches.suse/scsi-megaraid_sas-Fix-double-kfree.patch b/patches.suse/scsi-megaraid_sas-Fix-double-kfree.patch new file mode 100644 index 0000000..47a4201 --- /dev/null +++ b/patches.suse/scsi-megaraid_sas-Fix-double-kfree.patch @@ -0,0 +1,29 @@ +From: Guixin Liu +Date: Tue, 2 Aug 2022 15:18:49 +0800 +Subject: scsi: megaraid_sas: Fix double kfree() +Patch-mainline: v6.0-rc3 +Git-commit: 8c499e49240bd93628368c3588975cfb94169b8b +References: jsc#PED-1490 + +When allocating log_to_span fails, kfree(instance->ctrl_context) is called +twice. Remove redundant call. + +Link: https://lore.kernel.org/r/1659424729-46502-1-git-send-email-kanie@linux.alibaba.com +Acked-by: Sumit Saxena +Signed-off-by: Guixin Liu +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/megaraid/megaraid_sas_fusion.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c ++++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c +@@ -5310,7 +5310,6 @@ megasas_alloc_fusion_context(struct mega + if (!fusion->log_to_span) { + dev_err(&instance->pdev->dev, "Failed from %s %d\n", + __func__, __LINE__); +- kfree(instance->ctrl_context); + return -ENOMEM; + } + } diff --git a/patches.suse/scsi-megaraid_sas-Remove-redundant-memset-statement.patch b/patches.suse/scsi-megaraid_sas-Remove-redundant-memset-statement.patch new file mode 100644 index 0000000..cc68237 --- /dev/null +++ b/patches.suse/scsi-megaraid_sas-Remove-redundant-memset-statement.patch @@ -0,0 +1,29 @@ +From: Harshit Mogalapalli +Date: Thu, 5 May 2022 07:32:13 -0700 +Subject: scsi: megaraid_sas: Remove redundant memset() statement +Patch-mainline: v5.19-rc1 +Git-commit: 2f9e9a7b0ce38471c6f22a762adca6a977edfa7d +References: jsc#PED-1490 + +As memset() of scmd->sense_buffer is immediately followed by a memcpy() +where scmd->sense_buffer is the destination. The memset() is redundant. + +Link: https://lore.kernel.org/r/20220505143214.44908-1-harshit.m.mogalapalli@oracle.com +Signed-off-by: Harshit Mogalapalli +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/megaraid/megaraid_sas_fusion.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c ++++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c +@@ -2047,8 +2047,6 @@ map_cmd_status(struct fusion_context *fu + + scmd->result = (DID_OK << 16) | ext_status; + if (ext_status == SAM_STAT_CHECK_CONDITION) { +- memset(scmd->sense_buffer, 0, +- SCSI_SENSE_BUFFERSIZE); + memcpy(scmd->sense_buffer, sense, + SCSI_SENSE_BUFFERSIZE); + } diff --git a/patches.suse/scsi-megaraid_sas-Remove-redundant-variable-cmd_type.patch b/patches.suse/scsi-megaraid_sas-Remove-redundant-variable-cmd_type.patch new file mode 100644 index 0000000..fc2efb4 --- /dev/null +++ b/patches.suse/scsi-megaraid_sas-Remove-redundant-variable-cmd_type.patch @@ -0,0 +1,43 @@ +From: Colin Ian King +Date: Sat, 30 Jul 2022 13:45:09 +0100 +Subject: scsi: megaraid_sas: Remove redundant variable cmd_type +Patch-mainline: v6.0-rc1 +Git-commit: 6464d5b8a2768e8ff63d24b76299fe614e205aa7 +References: jsc#PED-1490 + +The variable cmd_type is assigned a value but it is never read. The +variable and the assignment are redundant and can be removed. + +Cleans up clang scan build warning: + + drivers/scsi/megaraid/megaraid_sas_fusion.c:3228:10: warning: Although + the value stored to 'cmd_type' is used in the enclosing expression, the + value is never actually read from 'cmd_type' [deadcode.DeadStores] + +Link: https://lore.kernel.org/r/20220730124509.148457-1-colin.i.king@gmail.com +Signed-off-by: Colin Ian King +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/megaraid/megaraid_sas_fusion.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c ++++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c +@@ -3199,7 +3199,6 @@ megasas_build_io_fusion(struct megasas_i + struct megasas_cmd_fusion *cmd) + { + int sge_count; +- u8 cmd_type; + u16 pd_index = 0; + u8 drive_type = 0; + struct MPI2_RAID_SCSI_IO_REQUEST *io_request = cmd->io_request; +@@ -3225,7 +3224,7 @@ megasas_build_io_fusion(struct megasas_i + */ + io_request->IoFlags = cpu_to_le16(scp->cmd_len); + +- switch (cmd_type = megasas_cmd_type(scp)) { ++ switch (megasas_cmd_type(scp)) { + case READ_WRITE_LDIO: + megasas_build_ldio_fusion(instance, scp, cmd); + break; diff --git a/patches.suse/scsi-megaraid_sas-Remove-unnecessary-kfree.patch b/patches.suse/scsi-megaraid_sas-Remove-unnecessary-kfree.patch new file mode 100644 index 0000000..6926400 --- /dev/null +++ b/patches.suse/scsi-megaraid_sas-Remove-unnecessary-kfree.patch @@ -0,0 +1,46 @@ +From: Guixin Liu +Date: Tue, 2 Aug 2022 15:19:00 +0800 +Subject: scsi: megaraid_sas: Remove unnecessary kfree() +Patch-mainline: v6.0-rc3 +Git-commit: 7dd6f4af9482c319fa829583799e63e38967177d +References: jsc#PED-1490 + +When alloc ctrl mem fails, the reply_map will subsequently be freed in +megasas_free_ctrl_mem(). No need to free it in megasas_alloc_ctrl_mem(). + +Link: https://lore.kernel.org/r/1659424740-46918-1-git-send-email-kanie@linux.alibaba.com +Acked-by: Sumit Saxena +Signed-off-by: Guixin Liu +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/megaraid/megaraid_sas_base.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +--- a/drivers/scsi/megaraid/megaraid_sas_base.c ++++ b/drivers/scsi/megaraid/megaraid_sas_base.c +@@ -7153,22 +7153,18 @@ static int megasas_alloc_ctrl_mem(struct + switch (instance->adapter_type) { + case MFI_SERIES: + if (megasas_alloc_mfi_ctrl_mem(instance)) +- goto fail; ++ return -ENOMEM; + break; + case AERO_SERIES: + case VENTURA_SERIES: + case THUNDERBOLT_SERIES: + case INVADER_SERIES: + if (megasas_alloc_fusion_context(instance)) +- goto fail; ++ return -ENOMEM; + break; + } + + return 0; +- fail: +- kfree(instance->reply_map); +- instance->reply_map = NULL; +- return -ENOMEM; + } + + /* diff --git a/patches.suse/scsi-megaraid_sas-Remove-unnecessary-memset.patch b/patches.suse/scsi-megaraid_sas-Remove-unnecessary-memset.patch new file mode 100644 index 0000000..b975457 --- /dev/null +++ b/patches.suse/scsi-megaraid_sas-Remove-unnecessary-memset.patch @@ -0,0 +1,30 @@ +From: Wan Jiabing +Date: Thu, 7 Apr 2022 15:24:42 +0800 +Subject: scsi: megaraid_sas: Remove unnecessary memset +Patch-mainline: v5.19-rc1 +Git-commit: 3a6a7187f09a0b1add76aaf4015f215a381ab616 +References: jsc#PED-1490 + +instance->cmd_list is allocated by kcalloc(). The memory is already set to +zero. It is unnecessary to call memset again. + +Link: https://lore.kernel.org/r/20220407072442.4137977-1-wanjiabing@vivo.com +Acked-by: Sumit Saxena +Signed-off-by: Wan Jiabing +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/megaraid/megaraid_sas_base.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/drivers/scsi/megaraid/megaraid_sas_base.c ++++ b/drivers/scsi/megaraid/megaraid_sas_base.c +@@ -4473,8 +4473,6 @@ int megasas_alloc_cmds(struct megasas_in + return -ENOMEM; + } + +- memset(instance->cmd_list, 0, sizeof(struct megasas_cmd *) *max_cmd); +- + for (i = 0; i < max_cmd; i++) { + instance->cmd_list[i] = kmalloc(sizeof(struct megasas_cmd), + GFP_KERNEL); diff --git a/patches.suse/scsi-megaraid_sas-Replace-one-element-array-with-fle-204a29a1.patch b/patches.suse/scsi-megaraid_sas-Replace-one-element-array-with-fle-204a29a1.patch new file mode 100644 index 0000000..baf0137 --- /dev/null +++ b/patches.suse/scsi-megaraid_sas-Replace-one-element-array-with-fle-204a29a1.patch @@ -0,0 +1,37 @@ +From: "Gustavo A. R. Silva" +Date: Mon, 15 Aug 2022 16:42:21 -0500 +Subject: scsi: megaraid_sas: Replace one-element array with flexible-array + member in MR_FW_RAID_MAP_DYNAMIC +Patch-mainline: v6.1-rc1 +Git-commit: 204a29a169f4c80aa3b7feb41c45cbd6833aba21 +References: jsc#PED-1490 + +One-element arrays are deprecated, and we are replacing them with flexible +array members instead. So, replace one-element array with flexible-array +member in struct MR_FW_RAID_MAP_DYNAMIC. + +This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines +on memcpy(). + +Link: https://github.com/KSPP/linux/issues/79 +Link: https://github.com/KSPP/linux/issues/109 +Link: https://lore.kernel.org/r/896476f8fe43cf83b491c6c13f59c9ace780d82c.1660592640.git.gustavoars@kernel.org +Reviewed-by: Kees Cook +Signed-off-by: Gustavo A. R. Silva +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/megaraid/megaraid_sas_fusion.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/scsi/megaraid/megaraid_sas_fusion.h ++++ b/drivers/scsi/megaraid/megaraid_sas_fusion.h +@@ -1053,7 +1053,7 @@ struct MR_FW_RAID_MAP_DYNAMIC { + struct MR_RAID_MAP_DESC_TABLE + raid_map_desc_table[RAID_MAP_DESC_TYPE_COUNT]; + /* Variable Size buffer containing all data */ +- u32 raid_map_desc_data[1]; ++ u32 raid_map_desc_data[]; + }; /* Dynamicaly sized RAID MAp structure */ + + #define IEEE_SGE_FLAGS_ADDR_MASK (0x03) diff --git a/patches.suse/scsi-megaraid_sas-Replace-one-element-array-with-fle-ee92366a.patch b/patches.suse/scsi-megaraid_sas-Replace-one-element-array-with-fle-ee92366a.patch new file mode 100644 index 0000000..0f9f667 --- /dev/null +++ b/patches.suse/scsi-megaraid_sas-Replace-one-element-array-with-fle-ee92366a.patch @@ -0,0 +1,73 @@ +From: "Gustavo A. R. Silva" +Date: Mon, 15 Aug 2022 16:49:38 -0500 +Subject: scsi: megaraid_sas: Replace one-element array with flexible-array + member in MR_PD_CFG_SEQ_NUM_SYNC +Patch-mainline: v6.1-rc1 +Git-commit: ee92366a8439856136368a106e6e08ffa8306a1e +References: jsc#PED-1490 + +One-element arrays are deprecated, and we are replacing them with flexible +array members instead. So, replace one-element array with flexible-array +member in struct MR_PD_CFG_SEQ_NUM_SYNC and refactor the rest of the code +accordingly. + +This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines +on memcpy() and help us make progress towards globally enabling +-fstrict-flex-arrays [0]. + +Link: https://github.com/KSPP/linux/issues/79 +Link: https://github.com/KSPP/linux/issues/109 +Link: Link: https://reviews.llvm.org/D126864 [0] +Link: https://lore.kernel.org/r/78e9261591db072b67fcf49f0216d7046a67ca6d.1660592640.git.gustavoars@kernel.org +Reviewed-by: Kees Cook +Signed-off-by: Gustavo A. R. Silva +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/megaraid/megaraid_sas_base.c | 4 ++-- + drivers/scsi/megaraid/megaraid_sas_fusion.c | 2 +- + drivers/scsi/megaraid/megaraid_sas_fusion.h | 2 +- + 3 files changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/scsi/megaraid/megaraid_sas_base.c ++++ b/drivers/scsi/megaraid/megaraid_sas_base.c +@@ -5793,7 +5793,7 @@ megasas_setup_jbod_map(struct megasas_in + u32 pd_seq_map_sz; + + pd_seq_map_sz = sizeof(struct MR_PD_CFG_SEQ_NUM_SYNC) + +- (sizeof(struct MR_PD_CFG_SEQ) * (MAX_PHYSICAL_DEVICES - 1)); ++ (sizeof(struct MR_PD_CFG_SEQ) * MAX_PHYSICAL_DEVICES); + + instance->use_seqnum_jbod_fp = + instance->support_seqnum_jbod_fp; +@@ -8042,7 +8042,7 @@ skip_firing_dcmds: + megasas_release_fusion(instance); + pd_seq_map_sz = sizeof(struct MR_PD_CFG_SEQ_NUM_SYNC) + + (sizeof(struct MR_PD_CFG_SEQ) * +- (MAX_PHYSICAL_DEVICES - 1)); ++ MAX_PHYSICAL_DEVICES); + for (i = 0; i < 2 ; i++) { + if (fusion->ld_map[i]) + dma_free_coherent(&instance->pdev->dev, +--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c ++++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c +@@ -1310,7 +1310,7 @@ megasas_sync_pd_seq_num(struct megasas_i + + pd_sync = (void *)fusion->pd_seq_sync[(instance->pd_seq_map_id & 1)]; + pd_seq_h = fusion->pd_seq_phys[(instance->pd_seq_map_id & 1)]; +- pd_seq_map_sz = struct_size(pd_sync, seq, MAX_PHYSICAL_DEVICES - 1); ++ pd_seq_map_sz = struct_size(pd_sync, seq, MAX_PHYSICAL_DEVICES); + + cmd = megasas_get_cmd(instance); + if (!cmd) { +--- a/drivers/scsi/megaraid/megaraid_sas_fusion.h ++++ b/drivers/scsi/megaraid/megaraid_sas_fusion.h +@@ -1249,7 +1249,7 @@ struct MR_PD_CFG_SEQ { + struct MR_PD_CFG_SEQ_NUM_SYNC { + __le32 size; + __le32 count; +- struct MR_PD_CFG_SEQ seq[1]; ++ struct MR_PD_CFG_SEQ seq[]; + } __packed; + + /* stream detection */ diff --git a/patches.suse/scsi-megaraid_sas-Replace-one-element-array-with-fle-eeb3bab7.patch b/patches.suse/scsi-megaraid_sas-Replace-one-element-array-with-fle-eeb3bab7.patch new file mode 100644 index 0000000..713f8e3 --- /dev/null +++ b/patches.suse/scsi-megaraid_sas-Replace-one-element-array-with-fle-eeb3bab7.patch @@ -0,0 +1,47 @@ +From: "Gustavo A. R. Silva" +Date: Mon, 15 Aug 2022 16:46:13 -0500 +Subject: scsi: megaraid_sas: Replace one-element array with flexible-array + member in MR_DRV_RAID_MAP +Patch-mainline: v6.1-rc1 +Git-commit: eeb3bab77244b8d91e4e9b611177cd1196900163 +References: jsc#PED-1490 + +One-element arrays are deprecated, and we are replacing them with flexible +array members instead. So, replace one-element array with flexible-array +member in struct MR_DRV_RAID_MAP and refactor the code accordingly. + +This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines +on memcpy(). + +Link: https://github.com/KSPP/linux/issues/79 +Link: https://github.com/KSPP/linux/issues/109 +Link: https://lore.kernel.org/r/1448f387821833726b99f0ce13069ada89164eb5.1660592640.git.gustavoars@kernel.org +Reviewed-by: Kees Cook +Signed-off-by: Gustavo A. R. Silva +Signed-off-by: Martin K. Petersen +Enhanced-by: Kees Cook # Change in struct MR_DRV_RAID_MAP_ALL +Acked-by: Martin Wilck +--- + drivers/scsi/megaraid/megaraid_sas_fusion.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/scsi/megaraid/megaraid_sas_fusion.h ++++ b/drivers/scsi/megaraid/megaraid_sas_fusion.h +@@ -1182,7 +1182,7 @@ struct MR_DRV_RAID_MAP { + devHndlInfo[MAX_RAIDMAP_PHYSICAL_DEVICES_DYN]; + u16 ldTgtIdToLd[MAX_LOGICAL_DRIVES_DYN]; + struct MR_ARRAY_INFO arMapInfo[MAX_API_ARRAYS_DYN]; +- struct MR_LD_SPAN_MAP ldSpanMap[1]; ++ struct MR_LD_SPAN_MAP ldSpanMap[]; + + }; + +@@ -1193,7 +1193,7 @@ struct MR_DRV_RAID_MAP { + struct MR_DRV_RAID_MAP_ALL { + + struct MR_DRV_RAID_MAP raidMap; +- struct MR_LD_SPAN_MAP ldSpanMap[MAX_LOGICAL_DRIVES_DYN - 1]; ++ struct MR_LD_SPAN_MAP ldSpanMap[MAX_LOGICAL_DRIVES_DYN]; + } __packed; + + diff --git a/patches.suse/scsi-megaraid_sas-Replace-one-element-array-with-fle.patch b/patches.suse/scsi-megaraid_sas-Replace-one-element-array-with-fle.patch new file mode 100644 index 0000000..1e7da58 --- /dev/null +++ b/patches.suse/scsi-megaraid_sas-Replace-one-element-array-with-fle.patch @@ -0,0 +1,72 @@ +From: "Gustavo A. R. Silva" +Date: Mon, 15 Aug 2022 16:40:35 -0500 +Subject: scsi: megaraid_sas: Replace one-element array with flexible-array + member in MR_FW_RAID_MAP +Patch-mainline: v6.1-rc1 +Git-commit: ac23b92b27e32f1ff331350342ce9f2ee9d0ab0f +References: jsc#PED-1490 + +One-element arrays are deprecated, and we are replacing them with flexible +array members instead. So, replace one-element array with flexible-array +member in struct MR_DRV_RAID_MAP and refactor the the rest of the code +accordingly. + +This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines +on memcpy(). + +Link: https://github.com/KSPP/linux/issues/79 +Link: https://github.com/KSPP/linux/issues/109 +Link: https://lore.kernel.org/r/4495ce170c8ef088a10f1abe0e7c227368f43242.1660592640.git.gustavoars@kernel.org +Reviewed-by: Kees Cook +Signed-off-by: Gustavo A. R. Silva +Signed-off-by: Martin K. Petersen +Enhanced-by: Kees Cook # Change in struct MR_FW_RAID_MAP_ALL +Acked-by: Martin Wilck +--- + drivers/scsi/megaraid/megaraid_sas_base.c | 2 +- + drivers/scsi/megaraid/megaraid_sas_fp.c | 2 +- + drivers/scsi/megaraid/megaraid_sas_fusion.h | 4 ++-- + 3 files changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/scsi/megaraid/megaraid_sas_base.c ++++ b/drivers/scsi/megaraid/megaraid_sas_base.c +@@ -5157,7 +5157,7 @@ static void megasas_update_ext_vd_detail + } else { + fusion->old_map_sz = sizeof(struct MR_FW_RAID_MAP) + + (sizeof(struct MR_LD_SPAN_MAP) * +- (instance->fw_supported_vd_count - 1)); ++ instance->fw_supported_vd_count); + fusion->new_map_sz = sizeof(struct MR_FW_RAID_MAP_EXT); + + fusion->max_map_sz = +--- a/drivers/scsi/megaraid/megaraid_sas_fp.c ++++ b/drivers/scsi/megaraid/megaraid_sas_fp.c +@@ -327,7 +327,7 @@ u8 MR_ValidateMapInfo(struct megasas_ins + expected_size = sizeof(struct MR_FW_RAID_MAP_EXT); + else + expected_size = +- (sizeof(struct MR_FW_RAID_MAP) - sizeof(struct MR_LD_SPAN_MAP) + ++ (sizeof(struct MR_FW_RAID_MAP) + + (sizeof(struct MR_LD_SPAN_MAP) * le16_to_cpu(pDrvRaidMap->ldCount))); + + if (le32_to_cpu(pDrvRaidMap->totalSize) != expected_size) { +--- a/drivers/scsi/megaraid/megaraid_sas_fusion.h ++++ b/drivers/scsi/megaraid/megaraid_sas_fusion.h +@@ -942,7 +942,7 @@ struct MR_FW_RAID_MAP { + u8 reserved2[7]; + struct MR_ARRAY_INFO arMapInfo[MAX_RAIDMAP_ARRAYS]; + struct MR_DEV_HANDLE_INFO devHndlInfo[MAX_RAIDMAP_PHYSICAL_DEVICES]; +- struct MR_LD_SPAN_MAP ldSpanMap[1]; ++ struct MR_LD_SPAN_MAP ldSpanMap[]; + }; + + struct IO_REQUEST_INFO { +@@ -1148,7 +1148,7 @@ typedef struct LOG_BLOCK_SPAN_INFO { + + struct MR_FW_RAID_MAP_ALL { + struct MR_FW_RAID_MAP raidMap; +- struct MR_LD_SPAN_MAP ldSpanMap[MAX_LOGICAL_DRIVES - 1]; ++ struct MR_LD_SPAN_MAP ldSpanMap[MAX_LOGICAL_DRIVES]; + } __attribute__ ((packed)); + + struct MR_DRV_RAID_MAP { diff --git a/patches.suse/scsi-megaraid_sas-Target-with-invalid-LUN-ID-is-dele.patch b/patches.suse/scsi-megaraid_sas-Target-with-invalid-LUN-ID-is-dele.patch new file mode 100644 index 0000000..a41f555 --- /dev/null +++ b/patches.suse/scsi-megaraid_sas-Target-with-invalid-LUN-ID-is-dele.patch @@ -0,0 +1,59 @@ +From: Chandrakanth patil +Date: Thu, 24 Mar 2022 02:47:11 -0700 +Subject: scsi: megaraid_sas: Target with invalid LUN ID is deleted during scan +Patch-mainline: v5.18-rc2 +Git-commit: 56495f295d8e021f77d065b890fc0100e3f9f6d8 +References: jsc#PED-1490 + +The megaraid_sas driver supports single LUN for RAID devices. That is LUN +0. All other LUNs are unsupported. When a device scan on a logical target +with invalid LUN number is invoked through sysfs, that target ends up +getting removed. + +Add LUN ID validation in the slave destroy function to avoid the target +deletion. + +Link: https://lore.kernel.org/r/20220324094711.48833-1-chandrakanth.patil@broadcom.com +Signed-off-by: Chandrakanth patil +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/megaraid/megaraid_sas.h | 3 +++ + drivers/scsi/megaraid/megaraid_sas_base.c | 7 +++++++ + 2 files changed, 10 insertions(+) + +--- a/drivers/scsi/megaraid/megaraid_sas.h ++++ b/drivers/scsi/megaraid/megaraid_sas.h +@@ -2560,6 +2560,9 @@ struct megasas_instance_template { + #define MEGASAS_IS_LOGICAL(sdev) \ + ((sdev->channel < MEGASAS_MAX_PD_CHANNELS) ? 0 : 1) + ++#define MEGASAS_IS_LUN_VALID(sdev) \ ++ (((sdev)->lun == 0) ? 1 : 0) ++ + #define MEGASAS_DEV_INDEX(scp) \ + (((scp->device->channel % 2) * MEGASAS_MAX_DEV_PER_CHANNEL) + \ + scp->device->id) +--- a/drivers/scsi/megaraid/megaraid_sas_base.c ++++ b/drivers/scsi/megaraid/megaraid_sas_base.c +@@ -2126,6 +2126,9 @@ static int megasas_slave_alloc(struct sc + goto scan_target; + } + return -ENXIO; ++ } else if (!MEGASAS_IS_LUN_VALID(sdev)) { ++ sdev_printk(KERN_INFO, sdev, "%s: invalid LUN\n", __func__); ++ return -ENXIO; + } + + scan_target: +@@ -2156,6 +2159,10 @@ static void megasas_slave_destroy(struct + instance = megasas_lookup_instance(sdev->host->host_no); + + if (MEGASAS_IS_LOGICAL(sdev)) { ++ if (!MEGASAS_IS_LUN_VALID(sdev)) { ++ sdev_printk(KERN_INFO, sdev, "%s: invalid LUN\n", __func__); ++ return; ++ } + ld_tgt_id = MEGASAS_TARGET_ID(sdev); + instance->ld_tgtid_status[ld_tgt_id] = LD_TARGET_ID_DELETED; + if (megasas_dbg_lvl & LD_PD_DEBUG) diff --git a/patches.suse/scsi-megaraid_sas-Use-irq_set_affinity_and_hint.patch b/patches.suse/scsi-megaraid_sas-Use-irq_set_affinity_and_hint.patch new file mode 100644 index 0000000..a69bbb2 --- /dev/null +++ b/patches.suse/scsi-megaraid_sas-Use-irq_set_affinity_and_hint.patch @@ -0,0 +1,104 @@ +From: Nitesh Narayan Lal +Date: Fri, 3 Sep 2021 11:24:20 -0400 +Subject: scsi: megaraid_sas: Use irq_set_affinity_and_hint() +Patch-mainline: v5.17-rc1 +Git-commit: 8049da6f3943d0ac51931b8064b2e4769a69a967 +References: jsc#PED-1490 + +The driver uses irq_set_affinity_hint() specifically for the high IOPS +queue interrupts for two purposes: + + - To set the affinity_hint which is consumed by the userspace for + distributing the interrupts + + - To apply an affinity that it provides + +The driver enforces its own affinity to bind the high IOPS queue interrupts +to the local NUMA node. However, irq_set_affinity_hint() applying the +provided cpumask as an affinity for the interrupt is an undocumented side +effect. + +To remove this side effect irq_set_affinity_hint() has been marked +as deprecated and new interfaces have been introduced. Hence, replace the +irq_set_affinity_hint() with the new interface irq_set_affinity_and_hint() +where the provided mask needs to be applied as the affinity and +affinity_hint pointer needs to be set and replace with +irq_update_affinity_hint() where only affinity_hint needs to be updated. + +Change the megasas_set_high_iops_queue_affinity_hint function name to +megasas_set_high_iops_queue_affinity_and_hint to clearly indicate that the +function is setting both affinity and affinity_hint. + +Signed-off-by: Nitesh Narayan Lal +Signed-off-by: Thomas Gleixner +Acked-by: Sumit Saxena +Link: https://lore.kernel.org/r/20210903152430.244937-5-nitesh@redhat.com +Acked-by: Martin Wilck +--- + drivers/scsi/megaraid/megaraid_sas_base.c | 27 +++++++++++++++------------ + 1 file changed, 15 insertions(+), 12 deletions(-) + +--- a/drivers/scsi/megaraid/megaraid_sas_base.c ++++ b/drivers/scsi/megaraid/megaraid_sas_base.c +@@ -5720,7 +5720,7 @@ megasas_setup_irqs_msix(struct megasas_i + "Failed to register IRQ for vector %d.\n", i); + for (j = 0; j < i; j++) { + if (j < instance->low_latency_index_start) +- irq_set_affinity_hint( ++ irq_update_affinity_hint( + pci_irq_vector(pdev, j), NULL); + free_irq(pci_irq_vector(pdev, j), + &instance->irq_context[j]); +@@ -5763,7 +5763,7 @@ megasas_destroy_irqs(struct megasas_inst + if (instance->msix_vectors) + for (i = 0; i < instance->msix_vectors; i++) { + if (i < instance->low_latency_index_start) +- irq_set_affinity_hint( ++ irq_update_affinity_hint( + pci_irq_vector(instance->pdev, i), NULL); + free_irq(pci_irq_vector(instance->pdev, i), + &instance->irq_context[i]); +@@ -5894,22 +5894,25 @@ int megasas_get_device_list(struct megas + } + + /** +- * megasas_set_high_iops_queue_affinity_hint - Set affinity hint for high IOPS queues +- * @instance: Adapter soft state +- * return: void ++ * megasas_set_high_iops_queue_affinity_and_hint - Set affinity and hint ++ * for high IOPS queues ++ * @instance: Adapter soft state ++ * return: void + */ + static inline void +-megasas_set_high_iops_queue_affinity_hint(struct megasas_instance *instance) ++megasas_set_high_iops_queue_affinity_and_hint(struct megasas_instance *instance) + { + int i; +- int local_numa_node; ++ unsigned int irq; ++ const struct cpumask *mask; + + if (instance->perf_mode == MR_BALANCED_PERF_MODE) { +- local_numa_node = dev_to_node(&instance->pdev->dev); ++ mask = cpumask_of_node(dev_to_node(&instance->pdev->dev)); + +- for (i = 0; i < instance->low_latency_index_start; i++) +- irq_set_affinity_hint(pci_irq_vector(instance->pdev, i), +- cpumask_of_node(local_numa_node)); ++ for (i = 0; i < instance->low_latency_index_start; i++) { ++ irq = pci_irq_vector(instance->pdev, i); ++ irq_set_affinity_and_hint(irq, mask); ++ } + } + } + +@@ -5998,7 +6001,7 @@ megasas_alloc_irq_vectors(struct megasas + instance->msix_vectors = 0; + + if (instance->smp_affinity_enable) +- megasas_set_high_iops_queue_affinity_hint(instance); ++ megasas_set_high_iops_queue_affinity_and_hint(instance); + } + + /** diff --git a/patches.suse/scsi-megaraid_sas-Use-struct_size-in-code-related-to-48658213.patch b/patches.suse/scsi-megaraid_sas-Use-struct_size-in-code-related-to-48658213.patch new file mode 100644 index 0000000..48c25b0 --- /dev/null +++ b/patches.suse/scsi-megaraid_sas-Use-struct_size-in-code-related-to-48658213.patch @@ -0,0 +1,63 @@ +From: "Gustavo A. R. Silva" +Date: Mon, 15 Aug 2022 16:52:33 -0500 +Subject: scsi: megaraid_sas: Use struct_size() in code related to struct + MR_PD_CFG_SEQ_NUM_SYNC +Patch-mainline: v6.1-rc1 +Git-commit: 48658213202c4f48ef34b43b9b6f60af8b67fb8a +References: jsc#PED-1490 + +Prefer struct_size() over open-coded versions of idiom: + + sizeof(struct-with-flex-array) + sizeof(type-of-flex-array) * count + +where count is the max number of items the flexible array is supposed to +have. + +Link: https://github.com/KSPP/linux/issues/160 +Link: https://lore.kernel.org/r/b215f4760f0e8fbe5fc35be20f2487e89924424d.1660592640.git.gustavoars@kernel.org +Reviewed-by: Kees Cook +Signed-off-by: Gustavo A. R. Silva +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/megaraid/megaraid_sas_base.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +--- a/drivers/scsi/megaraid/megaraid_sas_base.c ++++ b/drivers/scsi/megaraid/megaraid_sas_base.c +@@ -5790,10 +5790,10 @@ megasas_setup_jbod_map(struct megasas_in + { + int i; + struct fusion_context *fusion = instance->ctrl_context; +- u32 pd_seq_map_sz; ++ size_t pd_seq_map_sz; + +- pd_seq_map_sz = sizeof(struct MR_PD_CFG_SEQ_NUM_SYNC) + +- (sizeof(struct MR_PD_CFG_SEQ) * MAX_PHYSICAL_DEVICES); ++ pd_seq_map_sz = struct_size((struct MR_PD_CFG_SEQ_NUM_SYNC *)0, seq, ++ MAX_PHYSICAL_DEVICES); + + instance->use_seqnum_jbod_fp = + instance->support_seqnum_jbod_fp; +@@ -7968,7 +7968,7 @@ static void megasas_detach_one(struct pc + struct Scsi_Host *host; + struct megasas_instance *instance; + struct fusion_context *fusion; +- u32 pd_seq_map_sz; ++ size_t pd_seq_map_sz; + + instance = pci_get_drvdata(pdev); + +@@ -8040,9 +8040,9 @@ skip_firing_dcmds: + + if (instance->adapter_type != MFI_SERIES) { + megasas_release_fusion(instance); +- pd_seq_map_sz = sizeof(struct MR_PD_CFG_SEQ_NUM_SYNC) + +- (sizeof(struct MR_PD_CFG_SEQ) * +- MAX_PHYSICAL_DEVICES); ++ pd_seq_map_sz = ++ struct_size((struct MR_PD_CFG_SEQ_NUM_SYNC *)0, ++ seq, MAX_PHYSICAL_DEVICES); + for (i = 0; i < 2 ; i++) { + if (fusion->ld_map[i]) + dma_free_coherent(&instance->pdev->dev, diff --git a/patches.suse/scsi-megaraid_sas-Use-struct_size-in-code-related-to.patch b/patches.suse/scsi-megaraid_sas-Use-struct_size-in-code-related-to.patch new file mode 100644 index 0000000..7cc6c4d --- /dev/null +++ b/patches.suse/scsi-megaraid_sas-Use-struct_size-in-code-related-to.patch @@ -0,0 +1,56 @@ +From: "Gustavo A. R. Silva" +Date: Mon, 15 Aug 2022 16:51:36 -0500 +Subject: scsi: megaraid_sas: Use struct_size() in code related to struct + MR_FW_RAID_MAP +Patch-mainline: v6.1-rc1 +Git-commit: 41e830269d68a07b3e9214449b9ff0be7a3cfda5 +References: jsc#PED-1490 + +Prefer struct_size() over open-coded versions of idiom: + + sizeof(struct-with-flex-array) + sizeof(type-of-flex-array) * count + +where count is the max number of items the flexible array is supposed to +have. + +Link: https://github.com/KSPP/linux/issues/160 +Link: https://lore.kernel.org/r/1211398fb8f7ab332a93f4f8f1a63e8168dbd002.1660592640.git.gustavoars@kernel.org +Reviewed-by: Kees Cook +Signed-off-by: Gustavo A. R. Silva +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/megaraid/megaraid_sas_base.c | 6 +++--- + drivers/scsi/megaraid/megaraid_sas_fp.c | 6 +++--- + 2 files changed, 6 insertions(+), 6 deletions(-) + +--- a/drivers/scsi/megaraid/megaraid_sas_base.c ++++ b/drivers/scsi/megaraid/megaraid_sas_base.c +@@ -5155,9 +5155,9 @@ static void megasas_update_ext_vd_detail + fusion->current_map_sz = ventura_map_sz; + fusion->max_map_sz = ventura_map_sz; + } else { +- fusion->old_map_sz = sizeof(struct MR_FW_RAID_MAP) + +- (sizeof(struct MR_LD_SPAN_MAP) * +- instance->fw_supported_vd_count); ++ fusion->old_map_sz = ++ struct_size((struct MR_FW_RAID_MAP *)0, ldSpanMap, ++ instance->fw_supported_vd_count); + fusion->new_map_sz = sizeof(struct MR_FW_RAID_MAP_EXT); + + fusion->max_map_sz = +--- a/drivers/scsi/megaraid/megaraid_sas_fp.c ++++ b/drivers/scsi/megaraid/megaraid_sas_fp.c +@@ -326,9 +326,9 @@ u8 MR_ValidateMapInfo(struct megasas_ins + else if (instance->supportmax256vd) + expected_size = sizeof(struct MR_FW_RAID_MAP_EXT); + else +- expected_size = +- (sizeof(struct MR_FW_RAID_MAP) + +- (sizeof(struct MR_LD_SPAN_MAP) * le16_to_cpu(pDrvRaidMap->ldCount))); ++ expected_size = struct_size((struct MR_FW_RAID_MAP *)0, ++ ldSpanMap, ++ le16_to_cpu(pDrvRaidMap->ldCount)); + + if (le32_to_cpu(pDrvRaidMap->totalSize) != expected_size) { + dev_dbg(&instance->pdev->dev, "megasas: map info structure size 0x%x", diff --git a/patches.suse/scsi-megasas-Clean-up-some-inconsistent-indenting.patch b/patches.suse/scsi-megasas-Clean-up-some-inconsistent-indenting.patch new file mode 100644 index 0000000..8048a5f --- /dev/null +++ b/patches.suse/scsi-megasas-Clean-up-some-inconsistent-indenting.patch @@ -0,0 +1,33 @@ +From: Yang Li +Date: Fri, 25 Feb 2022 09:16:05 +0800 +Subject: scsi: megasas: Clean up some inconsistent indenting +Patch-mainline: v5.18-rc1 +Git-commit: 7db304bd2a4fbf98ed763cd7f599598f5d0e2477 +References: jsc#PED-1490 + +Eliminate the following smatch warning: +drivers/scsi/megaraid/megaraid_sas_fusion.c:5104 megasas_reset_fusion() +warn: inconsistent indenting + +Link: https://lore.kernel.org/r/20220225011605.130927-1-yang.lee@linux.alibaba.com +Reported-by: Abaci Robot +Signed-off-by: Yang Li +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/megaraid/megaraid_sas_fusion.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c ++++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c +@@ -5100,8 +5100,8 @@ int megasas_reset_fusion(struct Scsi_Hos + if (instance->adapter_type >= VENTURA_SERIES) { + for (j = 0; j < MAX_LOGICAL_DRIVES_EXT; ++j) { + memset(fusion->stream_detect_by_ld[j], +- 0, sizeof(struct LD_STREAM_DETECT)); +- fusion->stream_detect_by_ld[j]->mru_bit_map ++ 0, sizeof(struct LD_STREAM_DETECT)); ++ fusion->stream_detect_by_ld[j]->mru_bit_map + = MR_STREAM_BITMAP; + } + } diff --git a/patches.suse/scsi-megasas-Stop-using-the-SCSI-pointer.patch b/patches.suse/scsi-megasas-Stop-using-the-SCSI-pointer.patch new file mode 100644 index 0000000..122f7dc --- /dev/null +++ b/patches.suse/scsi-megasas-Stop-using-the-SCSI-pointer.patch @@ -0,0 +1,156 @@ +From: Bart Van Assche +Date: Fri, 18 Feb 2022 11:51:01 -0800 +Subject: scsi: megasas: Stop using the SCSI pointer +Patch-mainline: v5.18-rc1 +Git-commit: 96e77a27431ac9ce2c76d6f946e2a5c03e19ca4b +References: jsc#PED-1490 + +Set .cmd_size in the SCSI host template instead of using the SCSI pointer +from struct scsi_cmnd. This patch prepares for removal of the SCSI pointer +from struct scsi_cmnd. + +Link: https://lore.kernel.org/r/20220218195117.25689-34-bvanassche@acm.org +Reviewed-by: Johannes Thumshirn +Reviewed-by: Hannes Reinecke +Reviewed-by: Himanshu Madhani +Signed-off-by: Bart Van Assche +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/megaraid/megaraid_sas.h | 12 ++++++++++++ + drivers/scsi/megaraid/megaraid_sas_base.c | 8 ++++---- + drivers/scsi/megaraid/megaraid_sas_fusion.c | 15 ++++++++------- + 3 files changed, 24 insertions(+), 11 deletions(-) + +--- a/drivers/scsi/megaraid/megaraid_sas.h ++++ b/drivers/scsi/megaraid/megaraid_sas.h +@@ -18,6 +18,8 @@ + #ifndef LSI_MEGARAID_SAS_H + #define LSI_MEGARAID_SAS_H + ++#include ++ + /* + * MegaRAID SAS Driver meta data + */ +@@ -2594,6 +2596,16 @@ struct megasas_cmd { + }; + }; + ++struct megasas_cmd_priv { ++ void *cmd_priv; ++ u8 status; ++}; ++ ++static inline struct megasas_cmd_priv *megasas_priv(struct scsi_cmnd *cmd) ++{ ++ return scsi_cmd_priv(cmd); ++} ++ + #define MAX_MGMT_ADAPTERS 1024 + #define MAX_IOCTL_SGE 16 + +--- a/drivers/scsi/megaraid/megaraid_sas_base.c ++++ b/drivers/scsi/megaraid/megaraid_sas_base.c +@@ -1760,7 +1760,7 @@ megasas_build_and_issue_cmd(struct megas + goto out_return_cmd; + + cmd->scmd = scmd; +- scmd->SCp.ptr = (char *)cmd; ++ megasas_priv(scmd)->cmd_priv = cmd; + + /* + * Issue the command to the FW +@@ -2992,11 +2992,10 @@ megasas_dump_reg_set(void __iomem *reg_s + void + megasas_dump_fusion_io(struct scsi_cmnd *scmd) + { +- struct megasas_cmd_fusion *cmd; ++ struct megasas_cmd_fusion *cmd = megasas_priv(scmd)->cmd_priv; + union MEGASAS_REQUEST_DESCRIPTOR_UNION *req_desc; + struct megasas_instance *instance; + +- cmd = (struct megasas_cmd_fusion *)scmd->SCp.ptr; + instance = (struct megasas_instance *)scmd->device->host->hostdata; + + scmd_printk(KERN_INFO, scmd, +@@ -3518,6 +3517,7 @@ static struct scsi_host_template megasas + .mq_poll = megasas_blk_mq_poll, + .change_queue_depth = scsi_change_queue_depth, + .max_segment_size = 0xffffffff, ++ .cmd_size = sizeof(struct megasas_cmd_priv), + }; + + /** +@@ -3601,7 +3601,7 @@ megasas_complete_cmd(struct megasas_inst + cmd->retry_for_fw_reset = 0; + + if (cmd->scmd) +- cmd->scmd->SCp.ptr = NULL; ++ megasas_priv(cmd->scmd)->cmd_priv = NULL; + + switch (hdr->cmd) { + case MFI_CMD_INVALID: +--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c ++++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c +@@ -2915,7 +2915,7 @@ megasas_build_ldio_fusion(struct megasas + get_updated_dev_handle(instance, + &fusion->load_balance_info[device_id], + &io_info, local_map_ptr); +- scp->SCp.Status |= MEGASAS_LOAD_BALANCE_FLAG; ++ megasas_priv(scp)->status |= MEGASAS_LOAD_BALANCE_FLAG; + cmd->pd_r1_lb = io_info.pd_after_lb; + if (instance->adapter_type >= VENTURA_SERIES) + rctx_g35->span_arm = io_info.span_arm; +@@ -2923,7 +2923,7 @@ megasas_build_ldio_fusion(struct megasas + rctx->span_arm = io_info.span_arm; + + } else +- scp->SCp.Status &= ~MEGASAS_LOAD_BALANCE_FLAG; ++ megasas_priv(scp)->status &= ~MEGASAS_LOAD_BALANCE_FLAG; + + if (instance->adapter_type >= VENTURA_SERIES) + cmd->r1_alt_dev_handle = io_info.r1_alt_dev_handle; +@@ -3293,7 +3293,7 @@ megasas_build_io_fusion(struct megasas_i + io_request->SenseBufferLength = SCSI_SENSE_BUFFERSIZE; + + cmd->scmd = scp; +- scp->SCp.ptr = (char *)cmd; ++ megasas_priv(scp)->cmd_priv = cmd; + + return 0; + } +@@ -3489,7 +3489,7 @@ megasas_complete_r1_command(struct megas + if (instance->ldio_threshold && + megasas_cmd_type(scmd_local) == READ_WRITE_LDIO) + atomic_dec(&instance->ldio_outstanding); +- scmd_local->SCp.ptr = NULL; ++ megasas_priv(scmd_local)->cmd_priv = NULL; + megasas_return_cmd_fusion(instance, cmd); + scsi_dma_unmap(scmd_local); + megasas_sdev_busy_dec(instance, scmd_local); +@@ -3613,12 +3613,13 @@ complete_cmd_fusion(struct megasas_insta + case MPI2_FUNCTION_SCSI_IO_REQUEST: /*Fast Path IO.*/ + /* Update load balancing info */ + if (fusion->load_balance_info && +- (cmd_fusion->scmd->SCp.Status & ++ (megasas_priv(cmd_fusion->scmd)->status & + MEGASAS_LOAD_BALANCE_FLAG)) { + device_id = MEGASAS_DEV_INDEX(scmd_local); + lbinfo = &fusion->load_balance_info[device_id]; + atomic_dec(&lbinfo->scsi_pending_cmds[cmd_fusion->pd_r1_lb]); +- cmd_fusion->scmd->SCp.Status &= ~MEGASAS_LOAD_BALANCE_FLAG; ++ megasas_priv(cmd_fusion->scmd)->status &= ++ ~MEGASAS_LOAD_BALANCE_FLAG; + } + fallthrough; /* and complete IO */ + case MEGASAS_MPI2_FUNCTION_LD_IO_REQUEST: /* LD-IO Path */ +@@ -3630,7 +3631,7 @@ complete_cmd_fusion(struct megasas_insta + if (instance->ldio_threshold && + (megasas_cmd_type(scmd_local) == READ_WRITE_LDIO)) + atomic_dec(&instance->ldio_outstanding); +- scmd_local->SCp.ptr = NULL; ++ megasas_priv(scmd_local)->cmd_priv = NULL; + megasas_return_cmd_fusion(instance, cmd_fusion); + scsi_dma_unmap(scmd_local); + megasas_sdev_busy_dec(instance, scmd_local); diff --git a/patches.suse/scsi-mpt3sas-Add-support-for-ATTO-ExpressSAS-H12xx-G.patch b/patches.suse/scsi-mpt3sas-Add-support-for-ATTO-ExpressSAS-H12xx-G.patch new file mode 100644 index 0000000..de73694 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Add-support-for-ATTO-ExpressSAS-H12xx-G.patch @@ -0,0 +1,447 @@ +From: Bradley Grove +Date: Fri, 5 Aug 2022 13:46:08 -0400 +Subject: scsi: mpt3sas: Add support for ATTO ExpressSAS H12xx GT devices +Patch-mainline: v6.1-rc1 +Git-commit: 91cf186aa1bfea06c7438b16eb40a612a4c3b87a +References: jsc#PED_1491 + +Add ATTO's PCI IDs and modify the driver to handle the unique NVRAM +structure used by ATTO's devices. + +Link: https://lore.kernel.org/r/20220805174609.14830-1-bgrove@attotech.com +Co-developed-by: Rob Crispo +Signed-off-by: Rob Crispo +Signed-off-by: Bradley Grove +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h | 1 + drivers/scsi/mpt3sas/mpt3sas_base.c | 173 ++++++++++++++++++++++++++++++++-- + drivers/scsi/mpt3sas/mpt3sas_base.h | 35 ++++++ + drivers/scsi/mpt3sas/mpt3sas_config.c | 124 ++++++++++++++++++++++++ + drivers/scsi/mpt3sas/mpt3sas_scsih.c | 6 + + 5 files changed, 333 insertions(+), 6 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h ++++ b/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h +@@ -534,6 +534,7 @@ typedef struct _MPI2_CONFIG_REPLY { + ****************************************************************************/ + + #define MPI2_MFGPAGE_VENDORID_LSI (0x1000) ++#define MPI2_MFGPAGE_VENDORID_ATTO (0x117C) + + /*MPI v2.0 SAS products */ + #define MPI2_MFGPAGE_DEVID_SAS2004 (0x0070) +--- a/drivers/scsi/mpt3sas/mpt3sas_base.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c +@@ -5425,6 +5425,151 @@ out: + } + + /** ++ * mpt3sas_atto_validate_nvram - validate the ATTO nvram read from mfg pg1 ++ * ++ * @ioc : per adapter object ++ * @n : ptr to the ATTO nvram structure ++ * Return: 0 for success, non-zero for failure. ++ */ ++static int ++mpt3sas_atto_validate_nvram(struct MPT3SAS_ADAPTER *ioc, ++ struct ATTO_SAS_NVRAM *n) ++{ ++ int r = -EINVAL; ++ union ATTO_SAS_ADDRESS *s1; ++ u32 len; ++ u8 *pb; ++ u8 ckSum; ++ ++ /* validate nvram checksum */ ++ pb = (u8 *) n; ++ ckSum = ATTO_SASNVR_CKSUM_SEED; ++ len = sizeof(struct ATTO_SAS_NVRAM); ++ ++ while (len--) ++ ckSum = ckSum + pb[len]; ++ ++ if (ckSum) { ++ ioc_err(ioc, "Invalid ATTO NVRAM checksum\n"); ++ return r; ++ } ++ ++ s1 = (union ATTO_SAS_ADDRESS *) n->SasAddr; ++ ++ if (n->Signature[0] != 'E' ++ || n->Signature[1] != 'S' ++ || n->Signature[2] != 'A' ++ || n->Signature[3] != 'S') ++ ioc_err(ioc, "Invalid ATTO NVRAM signature\n"); ++ else if (n->Version > ATTO_SASNVR_VERSION) ++ ioc_info(ioc, "Invalid ATTO NVRAM version"); ++ else if ((n->SasAddr[7] & (ATTO_SAS_ADDR_ALIGN - 1)) ++ || s1->b[0] != 0x50 ++ || s1->b[1] != 0x01 ++ || s1->b[2] != 0x08 ++ || (s1->b[3] & 0xF0) != 0x60 ++ || ((s1->b[3] & 0x0F) | le32_to_cpu(s1->d[1])) == 0) { ++ ioc_err(ioc, "Invalid ATTO SAS address\n"); ++ } else ++ r = 0; ++ return r; ++} ++ ++/** ++ * mpt3sas_atto_get_sas_addr - get the ATTO SAS address from mfg page 1 ++ * ++ * @ioc : per adapter object ++ * @*sas_addr : return sas address ++ * Return: 0 for success, non-zero for failure. ++ */ ++static int ++mpt3sas_atto_get_sas_addr(struct MPT3SAS_ADAPTER *ioc, union ATTO_SAS_ADDRESS *sas_addr) ++{ ++ Mpi2ManufacturingPage1_t mfg_pg1; ++ Mpi2ConfigReply_t mpi_reply; ++ struct ATTO_SAS_NVRAM *nvram; ++ int r; ++ __be64 addr; ++ ++ r = mpt3sas_config_get_manufacturing_pg1(ioc, &mpi_reply, &mfg_pg1); ++ if (r) { ++ ioc_err(ioc, "Failed to read manufacturing page 1\n"); ++ return r; ++ } ++ ++ /* validate nvram */ ++ nvram = (struct ATTO_SAS_NVRAM *) mfg_pg1.VPD; ++ r = mpt3sas_atto_validate_nvram(ioc, nvram); ++ if (r) ++ return r; ++ ++ addr = *((__be64 *) nvram->SasAddr); ++ sas_addr->q = cpu_to_le64(be64_to_cpu(addr)); ++ return r; ++} ++ ++/** ++ * mpt3sas_atto_init - perform initializaion for ATTO branded ++ * adapter. ++ * @ioc : per adapter object ++ *5 ++ * Return: 0 for success, non-zero for failure. ++ */ ++static int ++mpt3sas_atto_init(struct MPT3SAS_ADAPTER *ioc) ++{ ++ int sz = 0; ++ Mpi2BiosPage4_t *bios_pg4 = NULL; ++ Mpi2ConfigReply_t mpi_reply; ++ int r; ++ int ix; ++ union ATTO_SAS_ADDRESS sas_addr; ++ union ATTO_SAS_ADDRESS temp; ++ union ATTO_SAS_ADDRESS bias; ++ ++ r = mpt3sas_atto_get_sas_addr(ioc, &sas_addr); ++ if (r) ++ return r; ++ ++ /* get header first to get size */ ++ r = mpt3sas_config_get_bios_pg4(ioc, &mpi_reply, NULL, 0); ++ if (r) { ++ ioc_err(ioc, "Failed to read ATTO bios page 4 header.\n"); ++ return r; ++ } ++ ++ sz = mpi_reply.Header.PageLength * sizeof(u32); ++ bios_pg4 = kzalloc(sz, GFP_KERNEL); ++ if (!bios_pg4) { ++ ioc_err(ioc, "Failed to allocate memory for ATTO bios page.\n"); ++ return -ENOMEM; ++ } ++ ++ /* read bios page 4 */ ++ r = mpt3sas_config_get_bios_pg4(ioc, &mpi_reply, bios_pg4, sz); ++ if (r) { ++ ioc_err(ioc, "Failed to read ATTO bios page 4\n"); ++ goto out; ++ } ++ ++ /* Update bios page 4 with the ATTO WWID */ ++ bias.q = sas_addr.q; ++ bias.b[7] += ATTO_SAS_ADDR_DEVNAME_BIAS; ++ ++ for (ix = 0; ix < bios_pg4->NumPhys; ix++) { ++ temp.q = sas_addr.q; ++ temp.b[7] += ix; ++ bios_pg4->Phy[ix].ReassignmentWWID = temp.q; ++ bios_pg4->Phy[ix].ReassignmentDeviceName = bias.q; ++ } ++ r = mpt3sas_config_set_bios_pg4(ioc, &mpi_reply, bios_pg4, sz); ++ ++out: ++ kfree(bios_pg4); ++ return r; ++} ++ ++/** + * _base_static_config_pages - static start of day config pages + * @ioc: per adapter object + */ +@@ -5447,6 +5592,13 @@ _base_static_config_pages(struct MPT3SAS + if (rc) + return rc; + } ++ ++ if (ioc->pdev->vendor == MPI2_MFGPAGE_VENDORID_ATTO) { ++ rc = mpt3sas_atto_init(ioc); ++ if (rc) ++ return rc; ++ } ++ + /* + * Ensure correct T10 PI operation if vendor left EEDPTagMode + * flag unset in NVDATA. +@@ -5496,12 +5648,21 @@ _base_static_config_pages(struct MPT3SAS + rc = _base_assign_fw_reported_qd(ioc); + if (rc) + return rc; +- rc = mpt3sas_config_get_bios_pg2(ioc, &mpi_reply, &ioc->bios_pg2); +- if (rc) +- return rc; +- rc = mpt3sas_config_get_bios_pg3(ioc, &mpi_reply, &ioc->bios_pg3); +- if (rc) +- return rc; ++ ++ /* ++ * ATTO doesn't use bios page 2 and 3 for bios settings. ++ */ ++ if (ioc->pdev->vendor == MPI2_MFGPAGE_VENDORID_ATTO) ++ ioc->bios_pg3.BiosVersion = 0; ++ else { ++ rc = mpt3sas_config_get_bios_pg2(ioc, &mpi_reply, &ioc->bios_pg2); ++ if (rc) ++ return rc; ++ rc = mpt3sas_config_get_bios_pg3(ioc, &mpi_reply, &ioc->bios_pg3); ++ if (rc) ++ return rc; ++ } ++ + rc = mpt3sas_config_get_ioc_pg8(ioc, &mpi_reply, &ioc->ioc_pg8); + if (rc) + return rc; +--- a/drivers/scsi/mpt3sas/mpt3sas_base.h ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.h +@@ -1652,6 +1652,32 @@ struct mpt3sas_debugfs_buffer { + typedef u8 (*MPT_CALLBACK)(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, + u32 reply); + ++/* ++ * struct ATTO_SAS_NVRAM - ATTO NVRAM settings stored ++ * in Manufacturing page 1 used to get ++ * ATTO SasAddr. ++ */ ++struct ATTO_SAS_NVRAM { ++ u8 Signature[4]; ++ u8 Version; ++#define ATTO_SASNVR_VERSION 0 ++ ++ u8 Checksum; ++#define ATTO_SASNVR_CKSUM_SEED 0x5A ++ u8 Pad[10]; ++ u8 SasAddr[8]; ++#define ATTO_SAS_ADDR_ALIGN 64 ++ u8 Reserved[232]; ++}; ++ ++#define ATTO_SAS_ADDR_DEVNAME_BIAS 63 ++ ++union ATTO_SAS_ADDRESS { ++ U8 b[8]; ++ U16 w[4]; ++ U32 d[2]; ++ U64 q; ++}; + + /* base shared API */ + extern struct list_head mpt3sas_ioc_list; +@@ -1828,6 +1854,9 @@ int mpt3sas_config_get_number_hba_phys(s + u8 *num_phys); + int mpt3sas_config_get_manufacturing_pg0(struct MPT3SAS_ADAPTER *ioc, + Mpi2ConfigReply_t *mpi_reply, Mpi2ManufacturingPage0_t *config_page); ++int mpt3sas_config_get_manufacturing_pg1(struct MPT3SAS_ADAPTER *ioc, ++ Mpi2ConfigReply_t *mpi_reply, Mpi2ManufacturingPage1_t *config_page); ++ + int mpt3sas_config_get_manufacturing_pg7(struct MPT3SAS_ADAPTER *ioc, + Mpi2ConfigReply_t *mpi_reply, Mpi2ManufacturingPage7_t *config_page, + u16 sz); +@@ -1846,6 +1875,12 @@ int mpt3sas_config_get_bios_pg2(struct M + *mpi_reply, Mpi2BiosPage2_t *config_page); + int mpt3sas_config_get_bios_pg3(struct MPT3SAS_ADAPTER *ioc, Mpi2ConfigReply_t + *mpi_reply, Mpi2BiosPage3_t *config_page); ++int mpt3sas_config_set_bios_pg4(struct MPT3SAS_ADAPTER *ioc, ++ Mpi2ConfigReply_t *mpi_reply, Mpi2BiosPage4_t *config_page, ++ int sz_config_page); ++int mpt3sas_config_get_bios_pg4(struct MPT3SAS_ADAPTER *ioc, ++ Mpi2ConfigReply_t *mpi_reply, Mpi2BiosPage4_t *config_page, ++ int sz_config_page); + int mpt3sas_config_get_iounit_pg0(struct MPT3SAS_ADAPTER *ioc, Mpi2ConfigReply_t + *mpi_reply, Mpi2IOUnitPage0_t *config_page); + int mpt3sas_config_get_sas_device_pg0(struct MPT3SAS_ADAPTER *ioc, +--- a/drivers/scsi/mpt3sas/mpt3sas_config.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_config.c +@@ -541,6 +541,42 @@ mpt3sas_config_get_manufacturing_pg0(str + } + + /** ++ * mpt3sas_config_get_manufacturing_pg1 - obtain manufacturing page 1 ++ * @ioc: per adapter object ++ * @mpi_reply: reply mf payload returned from firmware ++ * @config_page: contents of the config page ++ * Context: sleep. ++ * ++ * Return: 0 for success, non-zero for failure. ++ */ ++int ++mpt3sas_config_get_manufacturing_pg1(struct MPT3SAS_ADAPTER *ioc, ++ Mpi2ConfigReply_t *mpi_reply, Mpi2ManufacturingPage1_t *config_page) ++{ ++ Mpi2ConfigRequest_t mpi_request; ++ int r; ++ ++ memset(&mpi_request, 0, sizeof(Mpi2ConfigRequest_t)); ++ mpi_request.Function = MPI2_FUNCTION_CONFIG; ++ mpi_request.Action = MPI2_CONFIG_ACTION_PAGE_HEADER; ++ mpi_request.Header.PageType = MPI2_CONFIG_PAGETYPE_MANUFACTURING; ++ mpi_request.Header.PageNumber = 1; ++ mpi_request.Header.PageVersion = MPI2_MANUFACTURING1_PAGEVERSION; ++ ioc->build_zero_len_sge_mpi(ioc, &mpi_request.PageBufferSGE); ++ r = _config_request(ioc, &mpi_request, mpi_reply, ++ MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT, NULL, 0); ++ if (r) ++ goto out; ++ ++ mpi_request.Action = MPI2_CONFIG_ACTION_PAGE_READ_CURRENT; ++ r = _config_request(ioc, &mpi_request, mpi_reply, ++ MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT, config_page, ++ sizeof(*config_page)); ++ out: ++ return r; ++} ++ ++/** + * mpt3sas_config_get_manufacturing_pg7 - obtain manufacturing page 7 + * @ioc: per adapter object + * @mpi_reply: reply mf payload returned from firmware +@@ -757,10 +793,98 @@ mpt3sas_config_get_bios_pg3(struct MPT3S + r = _config_request(ioc, &mpi_request, mpi_reply, + MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT, config_page, + sizeof(*config_page)); ++ ++ out: ++ return r; ++} ++ ++/** ++ * mpt3sas_config_set_bios_pg4 - write out bios page 4 ++ * @ioc: per adapter object ++ * @mpi_reply: reply mf payload returned from firmware ++ * @config_page: contents of the config page ++ * @sz_config_pg: sizeof the config page ++ * Context: sleep. ++ * ++ * Return: 0 for success, non-zero for failure. ++ */ ++int ++mpt3sas_config_set_bios_pg4(struct MPT3SAS_ADAPTER *ioc, ++ Mpi2ConfigReply_t *mpi_reply, Mpi2BiosPage4_t *config_page, ++ int sz_config_pg) ++{ ++ Mpi2ConfigRequest_t mpi_request; ++ int r; ++ ++ memset(&mpi_request, 0, sizeof(Mpi2ConfigRequest_t)); ++ ++ mpi_request.Function = MPI2_FUNCTION_CONFIG; ++ mpi_request.Action = MPI2_CONFIG_ACTION_PAGE_HEADER; ++ mpi_request.Header.PageType = MPI2_CONFIG_PAGETYPE_BIOS; ++ mpi_request.Header.PageNumber = 4; ++ mpi_request.Header.PageVersion = MPI2_BIOSPAGE4_PAGEVERSION; ++ ++ ioc->build_zero_len_sge_mpi(ioc, &mpi_request.PageBufferSGE); ++ ++ r = _config_request(ioc, &mpi_request, mpi_reply, ++ MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT, NULL, 0); ++ if (r) ++ goto out; ++ ++ mpi_request.Action = MPI2_CONFIG_ACTION_PAGE_WRITE_CURRENT; ++ r = _config_request(ioc, &mpi_request, mpi_reply, ++ MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT, config_page, ++ sz_config_pg); + out: + return r; + } + ++/** ++ * mpt3sas_config_get_bios_pg4 - read bios page 4 ++ * @ioc: per adapter object ++ * @mpi_reply: reply mf payload returned from firmware ++ * @config_page: contents of the config page ++ * @sz_config_pg: sizeof the config page ++ * Context: sleep. ++ * ++ * Return: 0 for success, non-zero for failure. ++ */ ++int ++mpt3sas_config_get_bios_pg4(struct MPT3SAS_ADAPTER *ioc, ++ Mpi2ConfigReply_t *mpi_reply, Mpi2BiosPage4_t *config_page, ++ int sz_config_pg) ++{ ++ Mpi2ConfigRequest_t mpi_request; ++ int r; ++ ++ memset(&mpi_request, 0, sizeof(Mpi2ConfigRequest_t)); ++ mpi_request.Function = MPI2_FUNCTION_CONFIG; ++ mpi_request.Action = MPI2_CONFIG_ACTION_PAGE_HEADER; ++ mpi_request.Header.PageType = MPI2_CONFIG_PAGETYPE_BIOS; ++ mpi_request.Header.PageNumber = 4; ++ mpi_request.Header.PageVersion = MPI2_BIOSPAGE4_PAGEVERSION; ++ ioc->build_zero_len_sge_mpi(ioc, &mpi_request.PageBufferSGE); ++ r = _config_request(ioc, &mpi_request, mpi_reply, ++ MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT, NULL, 0); ++ if (r) ++ goto out; ++ ++ /* ++ * The sizeof the page is variable. Allow for just the ++ * size to be returned ++ */ ++ if (config_page && sz_config_pg) { ++ mpi_request.Action = MPI2_CONFIG_ACTION_PAGE_READ_CURRENT; ++ ++ r = _config_request(ioc, &mpi_request, mpi_reply, ++ MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT, config_page, ++ sz_config_pg); ++ } ++ ++out: ++ return r; ++} ++ + /** + * mpt3sas_config_get_iounit_pg0 - obtain iounit page 0 + * @ioc: per adapter object +--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c +@@ -12733,6 +12733,12 @@ static const struct pci_device_id mpt3sa + PCI_ANY_ID, PCI_ANY_ID }, + + /* ++ * ATTO Branded ExpressSAS H12xx GT ++ */ ++ { MPI2_MFGPAGE_VENDORID_ATTO, MPI26_MFGPAGE_DEVID_HARD_SEC_3816, ++ PCI_ANY_ID, PCI_ANY_ID }, ++ ++ /* + * Sea SI –> 0x00E4 Invalid, 0x00E7 Tampered + */ + { MPI2_MFGPAGE_VENDORID_LSI, MPI26_MFGPAGE_DEVID_INVALID0_3816, diff --git a/patches.suse/scsi-mpt3sas-Convert-to-flexible-arrays.patch b/patches.suse/scsi-mpt3sas-Convert-to-flexible-arrays.patch new file mode 100644 index 0000000..8362364 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Convert-to-flexible-arrays.patch @@ -0,0 +1,52 @@ +From: Kees Cook +Date: Tue, 1 Feb 2022 14:39:48 -0800 +Subject: scsi: mpt3sas: Convert to flexible arrays +Patch-mainline: v5.18-rc1 +Git-commit: d20b3dae630f6718a72f7ab68c3b8c8e897bf09f +References: jsc#PED_1491 + +This converts to a flexible array instead of the old-style 1-element +arrays. The existing code already did the correct math for finding the size +of the resulting flexible array structure, so there is no binary +difference. + +The other two structures converted to use flexible arrays appear to have no +users at all. + +Link: https://lore.kernel.org/r/20220201223948.1455637-1-keescook@chromium.org +Signed-off-by: Kees Cook +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpi/mpi2_ioc.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h ++++ b/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h +@@ -537,7 +537,7 @@ typedef struct _MPI2_EVENT_NOTIFICATION_ + U16 Event; /*0x14 */ + U16 Reserved4; /*0x16 */ + U32 EventContext; /*0x18 */ +- U32 EventData[1]; /*0x1C */ ++ U32 EventData[]; /*0x1C */ + } MPI2_EVENT_NOTIFICATION_REPLY, *PTR_MPI2_EVENT_NOTIFICATION_REPLY, + Mpi2EventNotificationReply_t, + *pMpi2EventNotificationReply_t; +@@ -639,7 +639,7 @@ typedef struct _MPI2_EVENT_DATA_HOST_MES + U8 Reserved1; /*0x01 */ + U16 Reserved2; /*0x02 */ + U32 Reserved3; /*0x04 */ +- U32 HostData[1]; /*0x08 */ ++ U32 HostData[]; /*0x08 */ + } MPI2_EVENT_DATA_HOST_MESSAGE, *PTR_MPI2_EVENT_DATA_HOST_MESSAGE, + Mpi2EventDataHostMessage_t, *pMpi2EventDataHostMessage_t; + +@@ -1397,7 +1397,7 @@ typedef struct _MPI2_SEND_HOST_MESSAGE_R + U32 Reserved8; /*0x18 */ + U32 Reserved9; /*0x1C */ + U32 Reserved10; /*0x20 */ +- U32 HostData[1]; /*0x24 */ ++ U32 HostData[]; /*0x24 */ + } MPI2_SEND_HOST_MESSAGE_REQUEST, + *PTR_MPI2_SEND_HOST_MESSAGE_REQUEST, + Mpi2SendHostMessageRequest_t, diff --git a/patches.suse/scsi-mpt3sas-Disable-MPI2_FUNCTION_FW_DOWNLOAD-for-A.patch b/patches.suse/scsi-mpt3sas-Disable-MPI2_FUNCTION_FW_DOWNLOAD-for-A.patch new file mode 100644 index 0000000..54618b6 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Disable-MPI2_FUNCTION_FW_DOWNLOAD-for-A.patch @@ -0,0 +1,36 @@ +From: Bradley Grove +Date: Fri, 5 Aug 2022 13:46:09 -0400 +Subject: scsi: mpt3sas: Disable MPI2_FUNCTION_FW_DOWNLOAD for ATTO devices +Patch-mainline: v6.1-rc1 +Git-commit: f45fadde91ec892fdc453e3df9469e9457152526 +References: jsc#PED_1491 + +Disable firmware download for ATTO devices where it is not supported. + +Link: https://lore.kernel.org/r/20220805174609.14830-2-bgrove@attotech.com +Co-developed-by: Rob Crispo +Signed-off-by: Rob Crispo +Signed-off-by: Bradley Grove +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_ctl.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c +@@ -948,6 +948,14 @@ _ctl_do_mpt_command(struct MPT3SAS_ADAPT + break; + } + case MPI2_FUNCTION_FW_DOWNLOAD: ++ { ++ if (ioc->pdev->vendor == MPI2_MFGPAGE_VENDORID_ATTO) { ++ ioc_info(ioc, "Firmware download not supported for ATTO HBA.\n"); ++ ret = -EPERM; ++ break; ++ } ++ fallthrough; ++ } + case MPI2_FUNCTION_FW_UPLOAD: + { + ioc->build_sg(ioc, psge, data_out_dma, data_out_sz, data_in_dma, diff --git a/patches.suse/scsi-mpt3sas-Don-t-change-DMA-mask-while-reallocatin.patch b/patches.suse/scsi-mpt3sas-Don-t-change-DMA-mask-while-reallocatin.patch new file mode 100644 index 0000000..29b2a43 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Don-t-change-DMA-mask-while-reallocatin.patch @@ -0,0 +1,55 @@ +From: Sreekanth Reddy +Date: Thu, 25 Aug 2022 13:24:54 +0530 +Subject: scsi: mpt3sas: Don't change DMA mask while reallocating pools +Patch-mainline: v6.1-rc1 +Git-commit: 9df650963bf6d6c2c3fcd325d8c44ca2b99554fe +References: jsc#PED_1491 + +When a pool crosses the 4GB boundary region then before reallocating pools +change the coherent DMA mask to 32 bits and keep the normal DMA mask set to +63/64 bits. + +Link: https://lore.kernel.org/r/20220825075457.16422-2-sreekanth.reddy@broadcom.com +Signed-off-by: Sreekanth Reddy +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_base.c | 21 ++++++++++++++------- + 1 file changed, 14 insertions(+), 7 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_base.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c +@@ -2990,19 +2990,26 @@ static int + _base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev) + { + struct sysinfo s; ++ u64 coherent_dma_mask, dma_mask; + +- if (ioc->is_mcpu_endpoint || +- sizeof(dma_addr_t) == 4 || ioc->use_32bit_dma || +- dma_get_required_mask(&pdev->dev) <= DMA_BIT_MASK(32)) ++ if (ioc->is_mcpu_endpoint || sizeof(dma_addr_t) == 4 || ++ dma_get_required_mask(&pdev->dev) <= 32) { + ioc->dma_mask = 32; ++ coherent_dma_mask = dma_mask = DMA_BIT_MASK(32); + /* Set 63 bit DMA mask for all SAS3 and SAS35 controllers */ +- else if (ioc->hba_mpi_version_belonged > MPI2_VERSION) ++ } else if (ioc->hba_mpi_version_belonged > MPI2_VERSION) { + ioc->dma_mask = 63; +- else ++ coherent_dma_mask = dma_mask = DMA_BIT_MASK(63); ++ } else { + ioc->dma_mask = 64; ++ coherent_dma_mask = dma_mask = DMA_BIT_MASK(64); ++ } + +- if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(ioc->dma_mask)) || +- dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(ioc->dma_mask))) ++ if (ioc->use_32bit_dma) ++ coherent_dma_mask = DMA_BIT_MASK(32); ++ ++ if (dma_set_mask(&pdev->dev, dma_mask) || ++ dma_set_coherent_mask(&pdev->dev, coherent_dma_mask)) + return -ENODEV; + + if (ioc->dma_mask > 32) { diff --git a/patches.suse/scsi-mpt3sas-Fail-reset-operation-if-config-request-.patch b/patches.suse/scsi-mpt3sas-Fail-reset-operation-if-config-request-.patch new file mode 100644 index 0000000..6776194 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Fail-reset-operation-if-config-request-.patch @@ -0,0 +1,38 @@ +From: Sreekanth Reddy +Date: Tue, 5 Apr 2022 17:36:37 +0530 +Subject: scsi: mpt3sas: Fail reset operation if config request timed out +Patch-mainline: v5.18-rc2 +Git-commit: f61eb1216c959f93ffabd3b8781fa5b2b22f8907 +References: jsc#PED_1491 + +As part of controller reset operation the driver issues a config request +command. If this command gets times out, then fail the controller reset +operation instead of retrying it. + +Link: https://lore.kernel.org/r/20220405120637.20528-1-sreekanth.reddy@broadcom.com +Signed-off-by: Sreekanth Reddy +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_config.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_config.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_config.c +@@ -394,10 +394,13 @@ _config_request(struct MPT3SAS_ADAPTER * + retry_count++; + if (ioc->config_cmds.smid == smid) + mpt3sas_base_free_smid(ioc, smid); +- if ((ioc->shost_recovery) || (ioc->config_cmds.status & +- MPT3_CMD_RESET) || ioc->pci_error_recovery) ++ if (ioc->config_cmds.status & MPT3_CMD_RESET) + goto retry_config; +- issue_host_reset = 1; ++ if (ioc->shost_recovery || ioc->pci_error_recovery) { ++ issue_host_reset = 0; ++ r = -EFAULT; ++ } else ++ issue_host_reset = 1; + goto free_mem; + } + diff --git a/patches.suse/scsi-mpt3sas-Fix-_ctl_set_task_mid-TaskMID-check.patch b/patches.suse/scsi-mpt3sas-Fix-_ctl_set_task_mid-TaskMID-check.patch new file mode 100644 index 0000000..159b5d7 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Fix-_ctl_set_task_mid-TaskMID-check.patch @@ -0,0 +1,55 @@ +From: Damien Le Moal +Date: Tue, 8 Mar 2022 08:48:50 +0900 +Subject: scsi: mpt3sas: Fix _ctl_set_task_mid() TaskMID check +Patch-mainline: v5.19-rc1 +Git-commit: dceaef94a4753d4d49d493a6cd4a81168e384d6f +References: jsc#PED_1491 + +The TaskMID field of struct Mpi2SCSITaskManagementRequest_t is a 16-bit +little endian value. Fix the search loop in _ctl_set_task_mid() to add a +cpu_to_le16() conversion before checking the value of TaskMID to avoid +sparse warnings. While at it, simplify the search loop code to remove an +unnecessarily complicated if condition. + +Link: https://lore.kernel.org/r/20220307234854.148145-2-damien.lemoal@opensource.wdc.com +Signed-off-by: Damien Le Moal +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_ctl.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c +@@ -578,7 +578,7 @@ static int + _ctl_set_task_mid(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command *karg, + Mpi2SCSITaskManagementRequest_t *tm_request) + { +- u8 found = 0; ++ bool found = false; + u16 smid; + u16 handle; + struct scsi_cmnd *scmd; +@@ -600,6 +600,7 @@ _ctl_set_task_mid(struct MPT3SAS_ADAPTER + handle = le16_to_cpu(tm_request->DevHandle); + for (smid = ioc->scsiio_depth; smid && !found; smid--) { + struct scsiio_tracker *st; ++ __le16 task_mid; + + scmd = mpt3sas_scsih_scsi_lookup_get(ioc, smid); + if (!scmd) +@@ -618,10 +619,10 @@ _ctl_set_task_mid(struct MPT3SAS_ADAPTER + * first outstanding smid will be picked up. Otherwise, + * targeted smid will be the one. + */ +- if (!tm_request->TaskMID || tm_request->TaskMID == st->smid) { +- tm_request->TaskMID = cpu_to_le16(st->smid); +- found = 1; +- } ++ task_mid = cpu_to_le16(st->smid); ++ if (!tm_request->TaskMID) ++ tm_request->TaskMID = task_mid; ++ found = tm_request->TaskMID == task_mid; + } + + if (!found) { diff --git a/patches.suse/scsi-mpt3sas-Fix-adapter-replyPostRegisterIndex-decl.patch b/patches.suse/scsi-mpt3sas-Fix-adapter-replyPostRegisterIndex-decl.patch new file mode 100644 index 0000000..2dc7d58 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Fix-adapter-replyPostRegisterIndex-decl.patch @@ -0,0 +1,49 @@ +From: Damien Le Moal +Date: Tue, 8 Mar 2022 08:48:54 +0900 +Subject: scsi: mpt3sas: Fix adapter replyPostRegisterIndex declaration +Patch-mainline: v5.19-rc1 +Git-commit: fe413ab32b240d30c2500c3ba8f3f0ccbcf59fe4 +References: jsc#PED_1491 + +The replyPostRegisterIndex array of struct MPT3SAS_ADAPTER stores iomem +resource addresses. Fix its declaration to annotate it with __iomem to +avoid sparse warnings for writel() calls using the stored addresses. + +Link: https://lore.kernel.org/r/20220307234854.148145-6-damien.lemoal@opensource.wdc.com +Signed-off-by: Damien Le Moal +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_base.c | 9 +++++---- + drivers/scsi/mpt3sas/mpt3sas_base.h | 2 +- + 2 files changed, 6 insertions(+), 5 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_base.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c +@@ -3692,10 +3692,11 @@ mpt3sas_base_map_resources(struct MPT3SA + } + + for (i = 0; i < ioc->combined_reply_index_count; i++) { +- ioc->replyPostRegisterIndex[i] = (resource_size_t *) +- ((u8 __force *)&ioc->chip->Doorbell + +- MPI25_SUP_REPLY_POST_HOST_INDEX_OFFSET + +- (i * MPT3_SUP_REPLY_POST_HOST_INDEX_REG_OFFSET)); ++ ioc->replyPostRegisterIndex[i] = ++ (resource_size_t __iomem *) ++ ((u8 __force *)&ioc->chip->Doorbell + ++ MPI25_SUP_REPLY_POST_HOST_INDEX_OFFSET + ++ (i * MPT3_SUP_REPLY_POST_HOST_INDEX_REG_OFFSET)); + } + } + +--- a/drivers/scsi/mpt3sas/mpt3sas_base.h ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.h +@@ -1588,7 +1588,7 @@ struct MPT3SAS_ADAPTER { + u8 combined_reply_index_count; + u8 smp_affinity_enable; + /* reply post register index */ +- resource_size_t **replyPostRegisterIndex; ++ resource_size_t __iomem **replyPostRegisterIndex; + + struct list_head delayed_tr_list; + struct list_head delayed_tr_volume_list; diff --git a/patches.suse/scsi-mpt3sas-Fix-event-callback-log_code-value-handl.patch b/patches.suse/scsi-mpt3sas-Fix-event-callback-log_code-value-handl.patch new file mode 100644 index 0000000..73020c7 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Fix-event-callback-log_code-value-handl.patch @@ -0,0 +1,47 @@ +From: Damien Le Moal +Date: Tue, 8 Mar 2022 08:48:53 +0900 +Subject: scsi: mpt3sas: Fix event callback log_code value handling +Patch-mainline: v5.19-rc1 +Git-commit: 82b4420c288c45cc38a1be6c5b4e396c1ea4599f +References: jsc#PED_1491 + +In mpt3sas_scsih_event_callback(), fix a sparse warning when testing the +event log code value by replacing the use of a pointer to the address +storing the event log code with a log code local variable. Doing so, +le32_to_cpu() is used when the log code value is assigned, avoiding a +sparse warning. + +Link: https://lore.kernel.org/r/20220307234854.148145-5-damien.lemoal@opensource.wdc.com +Signed-off-by: Damien Le Moal +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_scsih.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c +@@ -10926,20 +10926,20 @@ mpt3sas_scsih_event_callback(struct MPT3 + case MPI2_EVENT_LOG_ENTRY_ADDED: + { + Mpi2EventDataLogEntryAdded_t *log_entry; +- u32 *log_code; ++ u32 log_code; + + if (!ioc->is_warpdrive) + break; + + log_entry = (Mpi2EventDataLogEntryAdded_t *) + mpi_reply->EventData; +- log_code = (u32 *)log_entry->LogData; ++ log_code = le32_to_cpu(*(__le32 *)log_entry->LogData); + + if (le16_to_cpu(log_entry->LogEntryQualifier) + != MPT2_WARPDRIVE_LOGENTRY) + break; + +- switch (le32_to_cpu(*log_code)) { ++ switch (log_code) { + case MPT2_WARPDRIVE_LC_SSDT: + ioc_warn(ioc, "WarpDrive Warning: IO Throttling has occurred in the WarpDrive subsystem. Check WarpDrive documentation for additional details.\n"); + break; diff --git a/patches.suse/scsi-mpt3sas-Fix-ioc-base_readl-use.patch b/patches.suse/scsi-mpt3sas-Fix-ioc-base_readl-use.patch new file mode 100644 index 0000000..09a1c81 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Fix-ioc-base_readl-use.patch @@ -0,0 +1,57 @@ +From: Damien Le Moal +Date: Tue, 8 Mar 2022 08:48:52 +0900 +Subject: scsi: mpt3sas: Fix ioc->base_readl() use +Patch-mainline: v5.19-rc1 +Git-commit: 7ab4d2441b952977556672c2fe3f4c2a698cbb37 +References: jsc#PED_1491 + +The functions _base_readl_aero() and _base_readl() used for an adapter +base_readl() method are implemented using a regular readl() call which +internally performs a conversion to CPU endianness (le32_to_cpu()) of +the values read. The users of the ioc base_readl() method should thus +not convert again the values read using le16_to_cpu(). +Fixing this removes sparse warnings. + +Link: https://lore.kernel.org/r/20220307234854.148145-4-damien.lemoal@opensource.wdc.com +Signed-off-by: Damien Le Moal +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_base.c | 13 ++++++------- + 1 file changed, 6 insertions(+), 7 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_base.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c +@@ -6912,16 +6912,16 @@ _base_handshake_req_reply_wait(struct MP + } + + /* read the first two 16-bits, it gives the total length of the reply */ +- reply[0] = le16_to_cpu(ioc->base_readl(&ioc->chip->Doorbell) +- & MPI2_DOORBELL_DATA_MASK); ++ reply[0] = ioc->base_readl(&ioc->chip->Doorbell) ++ & MPI2_DOORBELL_DATA_MASK; + writel(0, &ioc->chip->HostInterruptStatus); + if ((_base_wait_for_doorbell_int(ioc, 5))) { + ioc_err(ioc, "doorbell handshake int failed (line=%d)\n", + __LINE__); + return -EFAULT; + } +- reply[1] = le16_to_cpu(ioc->base_readl(&ioc->chip->Doorbell) +- & MPI2_DOORBELL_DATA_MASK); ++ reply[1] = ioc->base_readl(&ioc->chip->Doorbell) ++ & MPI2_DOORBELL_DATA_MASK; + writel(0, &ioc->chip->HostInterruptStatus); + + for (i = 2; i < default_reply->MsgLength * 2; i++) { +@@ -6933,9 +6933,8 @@ _base_handshake_req_reply_wait(struct MP + if (i >= reply_bytes/2) /* overflow case */ + ioc->base_readl(&ioc->chip->Doorbell); + else +- reply[i] = le16_to_cpu( +- ioc->base_readl(&ioc->chip->Doorbell) +- & MPI2_DOORBELL_DATA_MASK); ++ reply[i] = ioc->base_readl(&ioc->chip->Doorbell) ++ & MPI2_DOORBELL_DATA_MASK; + writel(0, &ioc->chip->HostInterruptStatus); + } + diff --git a/patches.suse/scsi-mpt3sas-Fix-junk-chars-displayed-while-printing.patch b/patches.suse/scsi-mpt3sas-Fix-junk-chars-displayed-while-printing.patch new file mode 100644 index 0000000..6833b2f --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Fix-junk-chars-displayed-while-printing.patch @@ -0,0 +1,29 @@ +From: Sreekanth Reddy +Date: Wed, 11 May 2022 12:56:20 +0530 +Subject: scsi: mpt3sas: Fix junk chars displayed while printing ChipName +Patch-mainline: v5.19-rc1 +Git-commit: 8e129add48e0c35a8f1bf5df91f50a438dd75ba6 +References: jsc#PED_1491 + +Terminate string after copying 16 bytes of ChipName data from Manufacturing +Page0 to prevent %s from printing junk characters. + +Link: https://lore.kernel.org/r/20220511072621.30657-1-sreekanth.reddy@broadcom.com +Signed-off-by: Sreekanth Reddy +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_base.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_base.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c +@@ -4753,7 +4753,7 @@ static void + _base_display_ioc_capabilities(struct MPT3SAS_ADAPTER *ioc) + { + int i = 0; +- char desc[16]; ++ char desc[17] = {0}; + u32 iounit_pg1_flags; + u32 bios_version; + diff --git a/patches.suse/scsi-mpt3sas-Fix-mpt3sas_check_same_4gb_region-kdoc-.patch b/patches.suse/scsi-mpt3sas-Fix-mpt3sas_check_same_4gb_region-kdoc-.patch new file mode 100644 index 0000000..7825eb4 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Fix-mpt3sas_check_same_4gb_region-kdoc-.patch @@ -0,0 +1,46 @@ +From: Damien Le Moal +Date: Mon, 4 Apr 2022 14:00:41 +0900 +Subject: scsi: mpt3sas: Fix mpt3sas_check_same_4gb_region() kdoc comment +Patch-mainline: v5.18-rc2 +Git-commit: 6eaa77144b90582cef7f1fc346f11df51f9f83d5 +References: jsc#PED_1491 + +The start_addres argument of mpt3sas_check_same_4gb_region() was misnamed +in the function kdoc comment, resulting in the following warning when +compiling with W=1. + +drivers/scsi/mpt3sas/mpt3sas_base.c:5728: warning: Function parameter or +member 'start_address' not described in 'mpt3sas_check_same_4gb_region' +drivers/scsi/mpt3sas/mpt3sas_base.c:5728: warning: Excess function +parameter 'reply_pool_start_address' description in +'mpt3sas_check_same_4gb_region' + +Fix the argument name in the function kdoc comment to avoid it. While at +it, remove a useless blank line between the kdoc and function code. + +Link: https://lore.kernel.org/r/20220404050041.594774-1-damien.lemoal@opensource.wdc.com +Acked-by: Sreekanth Reddy +Signed-off-by: Damien Le Moal +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_base.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_base.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c +@@ -5716,13 +5716,12 @@ _base_release_memory_pools(struct MPT3SA + /** + * mpt3sas_check_same_4gb_region - checks whether all reply queues in a set are + * having same upper 32bits in their base memory address. +- * @reply_pool_start_address: Base address of a reply queue set ++ * @start_address: Base address of a reply queue set + * @pool_sz: Size of single Reply Descriptor Post Queues pool size + * + * Return: 1 if reply queues in a set have a same upper 32bits in their base + * memory address, else 0. + */ +- + static int + mpt3sas_check_same_4gb_region(dma_addr_t start_address, u32 pool_sz) + { diff --git a/patches.suse/scsi-mpt3sas-Fix-out-of-bounds-compiler-warning.patch b/patches.suse/scsi-mpt3sas-Fix-out-of-bounds-compiler-warning.patch new file mode 100644 index 0000000..19b45f5 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Fix-out-of-bounds-compiler-warning.patch @@ -0,0 +1,87 @@ +From: Helge Deller +Date: Tue, 31 May 2022 22:09:27 +0200 +Subject: scsi: mpt3sas: Fix out-of-bounds compiler warning +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.19-rc2 +Git-commit: 120f1d95efb1cdb6fe023c84e38ba06d8f78cd03 +References: jsc#PED_1491 + +I'm facing this warning when building for the parisc64 architecture: + +drivers/scsi/mpt3sas/mpt3sas_base.c: In function ‘_base_make_ioc_operational’: +drivers/scsi/mpt3sas/mpt3sas_base.c:5396:40: warning: array subscript ‘Mpi2SasIOUnitPage1_t {aka struct _MPI2_CONFIG_PAGE_SASIOUNIT_1}[0]’ is partly outside array bounds of ‘unsigned char[20]’ [-Warray-bounds] + 5396 | (le16_to_cpu(sas_iounit_pg1->SASWideMaxQueueDepth)) ? +drivers/scsi/mpt3sas/mpt3sas_base.c:5382:26: note: referencing an object of size 20 allocated by ‘kzalloc’ + 5382 | sas_iounit_pg1 = kzalloc(sz, GFP_KERNEL); + | ^~~~~~~~~~~~~~~~~~~~~~~ + +The problem is, that only 20 bytes are allocated with kmalloc(), which is +sufficient to hold the bytes which are needed. Nevertheless, gcc complains +because the whole Mpi2SasIOUnitPage1_t struct is 32 bytes in size and thus +doesn't fit into those 20 bytes. + +This patch simply allocates all 32 bytes (instead of 20) and thus avoids +the warning. There is no functional change introduced by this patch. + +While touching the code I cleaned up to calculation of max_wideport_qd, +max_narrowport_qd and max_sata_qd to make it easier readable. + +Test successfully tested on a HP C8000 PA-RISC workstation with 64-bit +kernel. + +Link: https://lore.kernel.org/r/YpZ197iZdDZSCzrT@p100 +Signed-off-by: Helge Deller +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_base.c | 23 ++++++++++++----------- + 1 file changed, 12 insertions(+), 11 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_base.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c +@@ -5369,6 +5369,7 @@ static int _base_assign_fw_reported_qd(s + Mpi2ConfigReply_t mpi_reply; + Mpi2SasIOUnitPage1_t *sas_iounit_pg1 = NULL; + Mpi26PCIeIOUnitPage1_t pcie_iounit_pg1; ++ u16 depth; + int sz; + int rc = 0; + +@@ -5380,7 +5381,7 @@ static int _base_assign_fw_reported_qd(s + goto out; + /* sas iounit page 1 */ + sz = offsetof(Mpi2SasIOUnitPage1_t, PhyData); +- sas_iounit_pg1 = kzalloc(sz, GFP_KERNEL); ++ sas_iounit_pg1 = kzalloc(sizeof(Mpi2SasIOUnitPage1_t), GFP_KERNEL); + if (!sas_iounit_pg1) { + pr_err("%s: failure at %s:%d/%s()!\n", + ioc->name, __FILE__, __LINE__, __func__); +@@ -5393,16 +5394,16 @@ static int _base_assign_fw_reported_qd(s + ioc->name, __FILE__, __LINE__, __func__); + goto out; + } +- ioc->max_wideport_qd = +- (le16_to_cpu(sas_iounit_pg1->SASWideMaxQueueDepth)) ? +- le16_to_cpu(sas_iounit_pg1->SASWideMaxQueueDepth) : +- MPT3SAS_SAS_QUEUE_DEPTH; +- ioc->max_narrowport_qd = +- (le16_to_cpu(sas_iounit_pg1->SASNarrowMaxQueueDepth)) ? +- le16_to_cpu(sas_iounit_pg1->SASNarrowMaxQueueDepth) : +- MPT3SAS_SAS_QUEUE_DEPTH; +- ioc->max_sata_qd = (sas_iounit_pg1->SATAMaxQDepth) ? +- sas_iounit_pg1->SATAMaxQDepth : MPT3SAS_SATA_QUEUE_DEPTH; ++ ++ depth = le16_to_cpu(sas_iounit_pg1->SASWideMaxQueueDepth); ++ ioc->max_wideport_qd = (depth ? depth : MPT3SAS_SAS_QUEUE_DEPTH); ++ ++ depth = le16_to_cpu(sas_iounit_pg1->SASNarrowMaxQueueDepth); ++ ioc->max_narrowport_qd = (depth ? depth : MPT3SAS_SAS_QUEUE_DEPTH); ++ ++ depth = sas_iounit_pg1->SATAMaxQDepth; ++ ioc->max_sata_qd = (depth ? depth : MPT3SAS_SATA_QUEUE_DEPTH); ++ + /* pcie iounit page 1 */ + rc = mpt3sas_config_get_pcie_iounit_pg1(ioc, &mpi_reply, + &pcie_iounit_pg1, sizeof(Mpi26PCIeIOUnitPage1_t)); diff --git a/patches.suse/scsi-mpt3sas-Fix-trace-buffer-registration-failed.patch b/patches.suse/scsi-mpt3sas-Fix-trace-buffer-registration-failed.patch new file mode 100644 index 0000000..123766d --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Fix-trace-buffer-registration-failed.patch @@ -0,0 +1,56 @@ +From: Sreekanth Reddy +Date: Thu, 25 Aug 2022 13:24:55 +0530 +Subject: scsi: mpt3sas: Fix trace buffer registration failed +Patch-mainline: v6.1-rc1 +Git-commit: 463e683bfdc457ce0a15c2c920ed30d3145ed44e +References: jsc#PED_1491 + +The ExtendedType field was set to 1 in the diag buffer register command and +hence MPT Endpoint firmware is failing the request with Invalid Field +IOCStatus. + +memset the request frame to zero before framing the diag buffer register +command. + +Link: https://lore.kernel.org/r/20220825075457.16422-3-sreekanth.reddy@broadcom.com +Signed-off-by: Sreekanth Reddy +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_ctl.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c +@@ -1694,6 +1694,7 @@ _ctl_diag_register_2(struct MPT3SAS_ADAP + ioc->ctl_cmds.status = MPT3_CMD_PENDING; + memset(ioc->ctl_cmds.reply, 0, ioc->reply_sz); + mpi_request = mpt3sas_base_get_msg_frame(ioc, smid); ++ memset(mpi_request, 0, ioc->request_sz); + ioc->ctl_cmds.smid = smid; + + request_data = ioc->diag_buffer[buffer_type]; +@@ -1795,6 +1796,7 @@ _ctl_diag_register_2(struct MPT3SAS_ADAP + if (rc && request_data) { + dma_free_coherent(&ioc->pdev->dev, request_data_sz, + request_data, request_data_dma); ++ ioc->diag_buffer[buffer_type] = NULL; + ioc->diag_buffer_status[buffer_type] &= + ~MPT3_DIAG_BUFFER_IS_DRIVER_ALLOCATED; + } +@@ -2171,6 +2173,7 @@ mpt3sas_send_diag_release(struct MPT3SAS + ioc->ctl_cmds.status = MPT3_CMD_PENDING; + memset(ioc->ctl_cmds.reply, 0, ioc->reply_sz); + mpi_request = mpt3sas_base_get_msg_frame(ioc, smid); ++ memset(mpi_request, 0, ioc->request_sz); + ioc->ctl_cmds.smid = smid; + + mpi_request->Function = MPI2_FUNCTION_DIAG_RELEASE; +@@ -2425,6 +2428,7 @@ _ctl_diag_read_buffer(struct MPT3SAS_ADA + ioc->ctl_cmds.status = MPT3_CMD_PENDING; + memset(ioc->ctl_cmds.reply, 0, ioc->reply_sz); + mpi_request = mpt3sas_base_get_msg_frame(ioc, smid); ++ memset(mpi_request, 0, ioc->request_sz); + ioc->ctl_cmds.smid = smid; + + mpi_request->Function = MPI2_FUNCTION_DIAG_BUFFER_POST; diff --git a/patches.suse/scsi-mpt3sas-Fix-typo-in-comment.patch b/patches.suse/scsi-mpt3sas-Fix-typo-in-comment.patch new file mode 100644 index 0000000..0c3b419 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Fix-typo-in-comment.patch @@ -0,0 +1,28 @@ +From: Ren Yu +Date: Fri, 17 Jun 2022 16:15:57 +0800 +Subject: scsi: mpt3sas: Fix typo in comment +Patch-mainline: v6.0-rc1 +Git-commit: 13d2d3428768c63259b6bf1d0940fc0c5cc866d7 +References: jsc#PED_1491 + +Spelling mistake in comment: non-succesfull -> non-successful. + +Link: https://lore.kernel.org/r/20220617081557.9009-1-renyu@nfschina.com +Signed-off-by: Ren Yu +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_scsih.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c +@@ -5294,7 +5294,7 @@ _scsih_normalize_sense(char *sense_buffe + } + + /** +- * _scsih_scsi_ioc_info - translated non-succesfull SCSI_IO request ++ * _scsih_scsi_ioc_info - translated non-successful SCSI_IO request + * @ioc: per adapter object + * @scmd: pointer to scsi command object + * @mpi_reply: reply mf payload returned from firmware diff --git a/patches.suse/scsi-mpt3sas-Fix-whitespace-and-spelling-mistake.patch b/patches.suse/scsi-mpt3sas-Fix-whitespace-and-spelling-mistake.patch new file mode 100644 index 0000000..2422029 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Fix-whitespace-and-spelling-mistake.patch @@ -0,0 +1,58 @@ +From: Zhang Jiaming +Date: Fri, 17 Jun 2022 18:11:03 +0800 +Subject: scsi: mpt3sas: Fix whitespace and spelling mistake +Patch-mainline: v6.0-rc1 +Git-commit: efef34cb4545d55c9ac9ccd501a6ce562e737736 +References: jsc#PED_1491 + +There is a spelling mistake in _base_sas_ioc_info(). Change 'cant' to +'can't'. + +Also fix up whitespace. + +Link: https://lore.kernel.org/r/20220617101103.3162-1-jiaming@nfschina.com +Signed-off-by: Zhang Jiaming +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_base.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_base.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c +@@ -873,7 +873,7 @@ mpt3sas_base_stop_watchdog(struct MPT3SA + * @fault_code: fault code + */ + void +-mpt3sas_base_fault_info(struct MPT3SAS_ADAPTER *ioc , u16 fault_code) ++mpt3sas_base_fault_info(struct MPT3SAS_ADAPTER *ioc, u16 fault_code) + { + ioc_err(ioc, "fault_state(0x%04x)!\n", fault_code); + } +@@ -1057,7 +1057,7 @@ _base_sas_ioc_info(struct MPT3SAS_ADAPTE + desc = "config no defaults"; + break; + case MPI2_IOCSTATUS_CONFIG_CANT_COMMIT: +- desc = "config cant commit"; ++ desc = "config can't commit"; + break; + + /**************************************************************************** +@@ -1321,7 +1321,7 @@ _base_display_event_data(struct MPT3SAS_ + * @log_info: log info + */ + static void +-_base_sas_log_info(struct MPT3SAS_ADAPTER *ioc , u32 log_info) ++_base_sas_log_info(struct MPT3SAS_ADAPTER *ioc, u32 log_info) + { + union loginfo_type { + u32 loginfo; +@@ -1393,7 +1393,7 @@ _base_display_reply_info(struct MPT3SAS_ + + if ((ioc_status & MPI2_IOCSTATUS_MASK) && + (ioc->logging_level & MPT_DEBUG_REPLY)) { +- _base_sas_ioc_info(ioc , mpi_reply, ++ _base_sas_ioc_info(ioc, mpi_reply, + mpt3sas_base_get_msg_frame(ioc, smid)); + } + diff --git a/patches.suse/scsi-mpt3sas-Fix-writel-use.patch b/patches.suse/scsi-mpt3sas-Fix-writel-use.patch new file mode 100644 index 0000000..910d720 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Fix-writel-use.patch @@ -0,0 +1,69 @@ +From: Damien Le Moal +Date: Tue, 8 Mar 2022 08:48:51 +0900 +Subject: scsi: mpt3sas: Fix writel() use +Patch-mainline: v5.19-rc1 +Git-commit: b4efbec4c2a75b619fae4e8768be379e88c78687 +References: jsc#PED_1491 + +writel() internally executes cpu_to_le32() to convert the value being +written to little endian. The caller should thus not use this conversion +function for the value passed to writel(). Remove the cpu_to_le32() calls +in _base_put_smid_scsi_io_atomic(), _base_put_smid_fast_path_atomic(), +_base_put_smid_hi_priority_atomic() _base_put_smid_default_atomic() and +_base_handshake_req_reply_wait(). + +Link: https://lore.kernel.org/r/20220307234854.148145-3-damien.lemoal@opensource.wdc.com +Signed-off-by: Damien Le Moal +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_base.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_base.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c +@@ -4312,7 +4312,7 @@ _base_put_smid_scsi_io_atomic(struct MPT + descriptor.MSIxIndex = _base_set_and_get_msix_index(ioc, smid); + descriptor.SMID = cpu_to_le16(smid); + +- writel(cpu_to_le32(*request), &ioc->chip->AtomicRequestDescriptorPost); ++ writel(*request, &ioc->chip->AtomicRequestDescriptorPost); + } + + /** +@@ -4334,7 +4334,7 @@ _base_put_smid_fast_path_atomic(struct M + descriptor.MSIxIndex = _base_set_and_get_msix_index(ioc, smid); + descriptor.SMID = cpu_to_le16(smid); + +- writel(cpu_to_le32(*request), &ioc->chip->AtomicRequestDescriptorPost); ++ writel(*request, &ioc->chip->AtomicRequestDescriptorPost); + } + + /** +@@ -4357,7 +4357,7 @@ _base_put_smid_hi_priority_atomic(struct + descriptor.MSIxIndex = msix_task; + descriptor.SMID = cpu_to_le16(smid); + +- writel(cpu_to_le32(*request), &ioc->chip->AtomicRequestDescriptorPost); ++ writel(*request, &ioc->chip->AtomicRequestDescriptorPost); + } + + /** +@@ -4378,7 +4378,7 @@ _base_put_smid_default_atomic(struct MPT + descriptor.MSIxIndex = _base_set_and_get_msix_index(ioc, smid); + descriptor.SMID = cpu_to_le16(smid); + +- writel(cpu_to_le32(*request), &ioc->chip->AtomicRequestDescriptorPost); ++ writel(*request, &ioc->chip->AtomicRequestDescriptorPost); + } + + /** +@@ -6893,7 +6893,7 @@ _base_handshake_req_reply_wait(struct MP + + /* send message 32-bits at a time */ + for (i = 0, failed = 0; i < request_bytes/4 && !failed; i++) { +- writel(cpu_to_le32(request[i]), &ioc->chip->Doorbell); ++ writel(request[i], &ioc->chip->Doorbell); + if ((_base_wait_for_doorbell_ack(ioc, 5))) + failed = 1; + } diff --git a/patches.suse/scsi-mpt3sas-Increase-cmd_per_lun-to-128.patch b/patches.suse/scsi-mpt3sas-Increase-cmd_per_lun-to-128.patch new file mode 100644 index 0000000..8c82f37 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Increase-cmd_per_lun-to-128.patch @@ -0,0 +1,30 @@ +From: Sreekanth Reddy +Date: Thu, 25 Aug 2022 13:24:56 +0530 +Subject: scsi: mpt3sas: Increase cmd_per_lun to 128 +Patch-mainline: v6.1-rc1 +Git-commit: 669b2b667e69264cb9a914e0afd056abafa2f429 +References: jsc#PED_1491 + +With cmd_per_lun value 7, a higher number of cache lines (map_nr) are +needed while allocating sdev->budget_map which is not reasonable and hence +increase the cmd_per_lun value to 128. + +Link: https://lore.kernel.org/r/20220825075457.16422-4-sreekanth.reddy@broadcom.com +Signed-off-by: Sreekanth Reddy +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_scsih.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c +@@ -11988,7 +11988,7 @@ static struct scsi_host_template mpt3sas + .sg_tablesize = MPT3SAS_SG_DEPTH, + .max_sectors = 32767, + .max_segment_size = 0xffffffff, +- .cmd_per_lun = 7, ++ .cmd_per_lun = 128, + .shost_groups = mpt3sas_host_groups, + .sdev_groups = mpt3sas_dev_groups, + .track_queue_depth = 1, diff --git a/patches.suse/scsi-mpt3sas-Make-mpt3sas_dev_attrs-static.patch b/patches.suse/scsi-mpt3sas-Make-mpt3sas_dev_attrs-static.patch new file mode 100644 index 0000000..b3f4c02 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Make-mpt3sas_dev_attrs-static.patch @@ -0,0 +1,37 @@ +From: Jiapeng Chong +Date: Tue, 19 Oct 2021 18:27:19 +0800 +Subject: scsi: mpt3sas: Make mpt3sas_dev_attrs static +Patch-mainline: v5.16-rc1 +Git-commit: 0ae8f4785107eb7f63ac17d86d894be681427dd2 +References: jsc#PED_1491 + +This symbol is not used outside of mpt3sas_ctl.c, mark it static. + +Fixes the following sparse warning: + +drivers/scsi/mpt3sas/mpt3sas_ctl.c:3988:18: warning: symbol +'mpt3sas_dev_attrs' was not declared. Should it be static? + +Link: https://lore.kernel.org/r/1634639239-2892-1-git-send-email-jiapeng.chong@linux.alibaba.com +Fixes: 1bb3ca27d2ca ("scsi: mpt3sas: Switch to attribute groups") +Reported-by: Abaci Robot +Reviewed-by: Bart Van Assche +Reviewed-by: Himanshu Madhani +Signed-off-by: Jiapeng Chong +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_ctl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c +@@ -3985,7 +3985,7 @@ sas_ncq_prio_enable_store(struct device + } + static DEVICE_ATTR_RW(sas_ncq_prio_enable); + +-struct attribute *mpt3sas_dev_attrs[] = { ++static struct attribute *mpt3sas_dev_attrs[] = { + &dev_attr_sas_address.attr, + &dev_attr_sas_device_handle.attr, + &dev_attr_sas_ncq_prio_supported.attr, diff --git a/patches.suse/scsi-mpt3sas-Prevent-error-handler-escalation-when-d.patch b/patches.suse/scsi-mpt3sas-Prevent-error-handler-escalation-when-d.patch new file mode 100644 index 0000000..2c99f09 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Prevent-error-handler-escalation-when-d.patch @@ -0,0 +1,41 @@ +From: Sreekanth Reddy +Date: Tue, 16 Aug 2022 13:38:01 +0530 +Subject: scsi: mpt3sas: Prevent error handler escalation when device removed +Patch-mainline: v6.1-rc1 +Git-commit: e75c8ea0d73bebdbff3ceb51f55fd735cb232d86 +References: jsc#PED_1491 + +If SCSI error handling is taking place for timed out I/Os on a drive and +the corresponding drive is removed, then stop escalating to higher level of +reset by returning the TUR with "I_T NEXUS LOSS OCCURRED" sense key. + +Link: https://lore.kernel.org/r/20220816080801.13929-1-sreekanth.reddy@broadcom.com +Signed-off-by: Sreekanth Reddy +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_scsih.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c +@@ -5156,6 +5156,19 @@ scsih_qcmd(struct Scsi_Host *shost, stru + + /* invalid device handle */ + handle = sas_target_priv_data->handle; ++ ++ /* ++ * Avoid error handling escallation when device is disconnected ++ */ ++ if (handle == MPT3SAS_INVALID_DEVICE_HANDLE || sas_device_priv_data->block) { ++ if (scmd->device->host->shost_state == SHOST_RECOVERY && ++ scmd->cmnd[0] == TEST_UNIT_READY) { ++ scsi_build_sense(scmd, 0, UNIT_ATTENTION, 0x29, 0x07); ++ scsi_done(scmd); ++ return 0; ++ } ++ } ++ + if (handle == MPT3SAS_INVALID_DEVICE_HANDLE) { + scmd->result = DID_NO_CONNECT << 16; + scsi_done(scmd); diff --git a/patches.suse/scsi-mpt3sas-Remove-flush_scheduled_work-call.patch b/patches.suse/scsi-mpt3sas-Remove-flush_scheduled_work-call.patch new file mode 100644 index 0000000..b9b5e7e --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Remove-flush_scheduled_work-call.patch @@ -0,0 +1,29 @@ +From: Tetsuo Handa +Date: Thu, 9 Jun 2022 22:26:48 +0900 +Subject: scsi: mpt3sas: Remove flush_scheduled_work() call +Patch-mainline: v6.0-rc1 +Git-commit: 90c3ca3f247d1a95fc47232e9f0aef114becd605 +References: jsc#PED_1491 + +It seems to me that mpt3sas driver is using dedicated workqueues and is not +calling schedule{,_delayed}_work{,_on}(). Then, there will be no work to +flush using flush_scheduled_work(). + +Link: https://lore.kernel.org/r/f3b97c7c-1094-4e46-20d8-4321716d6f3f@I-love.SAKURA.ne.jp +Signed-off-by: Tetsuo Handa +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_scsih.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c +@@ -12410,7 +12410,6 @@ scsih_suspend(struct device *dev) + return rc; + + mpt3sas_base_stop_watchdog(ioc); +- flush_scheduled_work(); + scsi_block_requests(shost); + _scsih_nvme_shutdown(ioc); + ioc_info(ioc, "pdev=0x%p, slot=%s, entering operating state\n", diff --git a/patches.suse/scsi-mpt3sas-Remove-scsi_dma_map-error-messages.patch b/patches.suse/scsi-mpt3sas-Remove-scsi_dma_map-error-messages.patch new file mode 100644 index 0000000..78eea86 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Remove-scsi_dma_map-error-messages.patch @@ -0,0 +1,67 @@ +From: Sreekanth Reddy +Date: Thu, 3 Mar 2022 19:32:03 +0530 +Subject: scsi: mpt3sas: Remove scsi_dma_map() error messages +Patch-mainline: v5.18-rc1 +Git-commit: 0c25422d34b4726b2707d5f38560943155a91b80 +References: jsc#PED_1491 + +When scsi_dma_map() fails by returning a sges_left value less than zero, +the amount of logging produced can be extremely high. In a recent end-user +environment, 1200 messages per second were being sent to the log buffer. +This eventually overwhelmed the system and it stalled. + +These error messages are not needed. Remove them. + +Link: https://lore.kernel.org/r/20220303140203.12642-1-sreekanth.reddy@broadcom.com +Suggested-by: Christoph Hellwig +Signed-off-by: Sreekanth Reddy +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_base.c | 18 +++--------------- + 1 file changed, 3 insertions(+), 15 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_base.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c +@@ -2594,12 +2594,8 @@ _base_check_pcie_native_sgl(struct MPT3S + + /* Get the SG list pointer and info. */ + sges_left = scsi_dma_map(scmd); +- if (sges_left < 0) { +- sdev_printk(KERN_ERR, scmd->device, +- "scsi_dma_map failed: request for %d bytes!\n", +- scsi_bufflen(scmd)); ++ if (sges_left < 0) + return 1; +- } + + /* Check if we need to build a native SG list. */ + if (!base_is_prp_possible(ioc, pcie_device, +@@ -2706,12 +2702,8 @@ _base_build_sg_scmd(struct MPT3SAS_ADAPT + + sg_scmd = scsi_sglist(scmd); + sges_left = scsi_dma_map(scmd); +- if (sges_left < 0) { +- sdev_printk(KERN_ERR, scmd->device, +- "scsi_dma_map failed: request for %d bytes!\n", +- scsi_bufflen(scmd)); ++ if (sges_left < 0) + return -ENOMEM; +- } + + sg_local = &mpi_request->SGL; + sges_in_segment = ioc->max_sges_in_main_message; +@@ -2854,12 +2846,8 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ + + sg_scmd = scsi_sglist(scmd); + sges_left = scsi_dma_map(scmd); +- if (sges_left < 0) { +- sdev_printk(KERN_ERR, scmd->device, +- "scsi_dma_map failed: request for %d bytes!\n", +- scsi_bufflen(scmd)); ++ if (sges_left < 0) + return -ENOMEM; +- } + + sg_local = &mpi_request->SGL; + sges_in_segment = (ioc->request_sz - diff --git a/patches.suse/scsi-mpt3sas-Revert-scsi-mpt3sas-Fix-ioc-base_readl-.patch b/patches.suse/scsi-mpt3sas-Revert-scsi-mpt3sas-Fix-ioc-base_readl-.patch new file mode 100644 index 0000000..3ab9a36 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Revert-scsi-mpt3sas-Fix-ioc-base_readl-.patch @@ -0,0 +1,53 @@ +From: Damien Le Moal +Date: Fri, 16 Sep 2022 22:01:11 +0900 +Subject: scsi: mpt3sas: Revert "scsi: mpt3sas: Fix ioc->base_readl() use" +Patch-mainline: v6.1-rc1 +Git-commit: d82e68483b81768c8d19bc7529635dad741607ce +References: jsc#PED_1491 + +This reverts commit 7ab4d2441b952977556672c2fe3f4c2a698cbb37 as it is +breaking the mpt3sas driver on big-endian machines. + +Link: https://lore.kernel.org/r/20220916130111.168195-3-damien.lemoal@opensource.wdc.com +Signed-off-by: Damien Le Moal +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_base.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_base.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c +@@ -7075,16 +7075,16 @@ _base_handshake_req_reply_wait(struct MP + } + + /* read the first two 16-bits, it gives the total length of the reply */ +- reply[0] = ioc->base_readl(&ioc->chip->Doorbell) +- & MPI2_DOORBELL_DATA_MASK; ++ reply[0] = le16_to_cpu(ioc->base_readl(&ioc->chip->Doorbell) ++ & MPI2_DOORBELL_DATA_MASK); + writel(0, &ioc->chip->HostInterruptStatus); + if ((_base_wait_for_doorbell_int(ioc, 5))) { + ioc_err(ioc, "doorbell handshake int failed (line=%d)\n", + __LINE__); + return -EFAULT; + } +- reply[1] = ioc->base_readl(&ioc->chip->Doorbell) +- & MPI2_DOORBELL_DATA_MASK; ++ reply[1] = le16_to_cpu(ioc->base_readl(&ioc->chip->Doorbell) ++ & MPI2_DOORBELL_DATA_MASK); + writel(0, &ioc->chip->HostInterruptStatus); + + for (i = 2; i < default_reply->MsgLength * 2; i++) { +@@ -7096,8 +7096,9 @@ _base_handshake_req_reply_wait(struct MP + if (i >= reply_bytes/2) /* overflow case */ + ioc->base_readl(&ioc->chip->Doorbell); + else +- reply[i] = ioc->base_readl(&ioc->chip->Doorbell) +- & MPI2_DOORBELL_DATA_MASK; ++ reply[i] = le16_to_cpu( ++ ioc->base_readl(&ioc->chip->Doorbell) ++ & MPI2_DOORBELL_DATA_MASK); + writel(0, &ioc->chip->HostInterruptStatus); + } + diff --git a/patches.suse/scsi-mpt3sas-Revert-scsi-mpt3sas-Fix-writel-use.patch b/patches.suse/scsi-mpt3sas-Revert-scsi-mpt3sas-Fix-writel-use.patch new file mode 100644 index 0000000..e44fbb9 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Revert-scsi-mpt3sas-Fix-writel-use.patch @@ -0,0 +1,65 @@ +From: Damien Le Moal +Date: Fri, 16 Sep 2022 22:01:10 +0900 +Subject: scsi: mpt3sas: Revert "scsi: mpt3sas: Fix writel() use" +Patch-mainline: v6.1-rc1 +Git-commit: f920642e406cfa17ebecf03d5b83a02273ec718e +References: jsc#PED_1491 + +This reverts commit b4efbec4c2a75b619fae4e8768be379e88c78687 as it is +breaking the mpt3sas driver on big-endian machines. + +Link: https://lore.kernel.org/r/20220916130111.168195-2-damien.lemoal@opensource.wdc.com +Signed-off-by: Damien Le Moal +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_base.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_base.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c +@@ -4313,7 +4313,7 @@ _base_put_smid_scsi_io_atomic(struct MPT + descriptor.MSIxIndex = _base_set_and_get_msix_index(ioc, smid); + descriptor.SMID = cpu_to_le16(smid); + +- writel(*request, &ioc->chip->AtomicRequestDescriptorPost); ++ writel(cpu_to_le32(*request), &ioc->chip->AtomicRequestDescriptorPost); + } + + /** +@@ -4335,7 +4335,7 @@ _base_put_smid_fast_path_atomic(struct M + descriptor.MSIxIndex = _base_set_and_get_msix_index(ioc, smid); + descriptor.SMID = cpu_to_le16(smid); + +- writel(*request, &ioc->chip->AtomicRequestDescriptorPost); ++ writel(cpu_to_le32(*request), &ioc->chip->AtomicRequestDescriptorPost); + } + + /** +@@ -4358,7 +4358,7 @@ _base_put_smid_hi_priority_atomic(struct + descriptor.MSIxIndex = msix_task; + descriptor.SMID = cpu_to_le16(smid); + +- writel(*request, &ioc->chip->AtomicRequestDescriptorPost); ++ writel(cpu_to_le32(*request), &ioc->chip->AtomicRequestDescriptorPost); + } + + /** +@@ -4379,7 +4379,7 @@ _base_put_smid_default_atomic(struct MPT + descriptor.MSIxIndex = _base_set_and_get_msix_index(ioc, smid); + descriptor.SMID = cpu_to_le16(smid); + +- writel(*request, &ioc->chip->AtomicRequestDescriptorPost); ++ writel(cpu_to_le32(*request), &ioc->chip->AtomicRequestDescriptorPost); + } + + /** +@@ -7056,7 +7056,7 @@ _base_handshake_req_reply_wait(struct MP + + /* send message 32-bits at a time */ + for (i = 0, failed = 0; i < request_bytes/4 && !failed; i++) { +- writel(request[i], &ioc->chip->Doorbell); ++ writel(cpu_to_le32(request[i]), &ioc->chip->Doorbell); + if ((_base_wait_for_doorbell_ack(ioc, 5))) + failed = 1; + } diff --git a/patches.suse/scsi-mpt3sas-Update-driver-version-to-42.100.00.00.patch b/patches.suse/scsi-mpt3sas-Update-driver-version-to-42.100.00.00.patch new file mode 100644 index 0000000..34e6720 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Update-driver-version-to-42.100.00.00.patch @@ -0,0 +1,30 @@ +From: Sreekanth Reddy +Date: Wed, 11 May 2022 12:56:21 +0530 +Subject: scsi: mpt3sas: Update driver version to 42.100.00.00 +Patch-mainline: v5.19-rc1 +Git-commit: 53d5088deff64c526ac52b39ce1244ce10372367 +References: jsc#PED_1491 + +Update driver version to 42.100.00.00. + +Link: https://lore.kernel.org/r/20220511072621.30657-2-sreekanth.reddy@broadcom.com +Signed-off-by: Sreekanth Reddy +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_base.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_base.h ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.h +@@ -77,8 +77,8 @@ + #define MPT3SAS_DRIVER_NAME "mpt3sas" + #define MPT3SAS_AUTHOR "Avago Technologies " + #define MPT3SAS_DESCRIPTION "LSI MPT Fusion SAS 3.0 Device Driver" +-#define MPT3SAS_DRIVER_VERSION "40.100.00.00" +-#define MPT3SAS_MAJOR_VERSION 40 ++#define MPT3SAS_DRIVER_VERSION "42.100.00.00" ++#define MPT3SAS_MAJOR_VERSION 42 + #define MPT3SAS_MINOR_VERSION 100 + #define MPT3SAS_BUILD_VERSION 0 + #define MPT3SAS_RELEASE_VERSION 00 diff --git a/patches.suse/scsi-mpt3sas-Update-driver-version-to-43.100.00.00.patch b/patches.suse/scsi-mpt3sas-Update-driver-version-to-43.100.00.00.patch new file mode 100644 index 0000000..383cd5d --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Update-driver-version-to-43.100.00.00.patch @@ -0,0 +1,30 @@ +From: Sreekanth Reddy +Date: Thu, 25 Aug 2022 13:24:57 +0530 +Subject: scsi: mpt3sas: Update driver version to 43.100.00.00 +Patch-mainline: v6.1-rc1 +Git-commit: c0958d2335fe7327bdda294a1ed42debe14d0346 +References: jsc#PED_1491 + +Update driver version to 43.100.00.00. + +Link: https://lore.kernel.org/r/20220825075457.16422-5-sreekanth.reddy@broadcom.com +Signed-off-by: Sreekanth Reddy +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_base.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_base.h ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.h +@@ -77,8 +77,8 @@ + #define MPT3SAS_DRIVER_NAME "mpt3sas" + #define MPT3SAS_AUTHOR "Avago Technologies " + #define MPT3SAS_DESCRIPTION "LSI MPT Fusion SAS 3.0 Device Driver" +-#define MPT3SAS_DRIVER_VERSION "42.100.00.00" +-#define MPT3SAS_MAJOR_VERSION 42 ++#define MPT3SAS_DRIVER_VERSION "43.100.00.00" ++#define MPT3SAS_MAJOR_VERSION 43 + #define MPT3SAS_MINOR_VERSION 100 + #define MPT3SAS_BUILD_VERSION 0 + #define MPT3SAS_RELEASE_VERSION 00 diff --git a/patches.suse/scsi-mpt3sas-Update-persistent-trigger-pages-from-sy.patch b/patches.suse/scsi-mpt3sas-Update-persistent-trigger-pages-from-sy.patch new file mode 100644 index 0000000..42531f7 --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Update-persistent-trigger-pages-from-sy.patch @@ -0,0 +1,166 @@ +From: Suganath Prabu S +Date: Mon, 27 Dec 2021 11:00:55 +0530 +Subject: scsi: mpt3sas: Update persistent trigger pages from sysfs interface +Patch-mainline: v5.17-rc1 +Git-commit: 9211faa39a0350fb2239a0bce03b9459cd14fc40 +References: jsc#PED_1491 + +Store sysfs-provided trigger values into the corresponding persistent +trigger pages. Otherwise trigger entries are not persistent across system +reboots. + +Link: https://lore.kernel.org/r/20211227053055.289537-1-suganath-prabu.subramani@broadcom.com +Signed-off-by: Suganath Prabu S +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_base.h | 4 - + drivers/scsi/mpt3sas/mpt3sas_ctl.c | 87 ++++++++++++++++++++++++++++++++++-- + 2 files changed, 85 insertions(+), 6 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_base.h ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.h +@@ -77,8 +77,8 @@ + #define MPT3SAS_DRIVER_NAME "mpt3sas" + #define MPT3SAS_AUTHOR "Avago Technologies " + #define MPT3SAS_DESCRIPTION "LSI MPT Fusion SAS 3.0 Device Driver" +-#define MPT3SAS_DRIVER_VERSION "39.100.00.00" +-#define MPT3SAS_MAJOR_VERSION 39 ++#define MPT3SAS_DRIVER_VERSION "40.100.00.00" ++#define MPT3SAS_MAJOR_VERSION 40 + #define MPT3SAS_MINOR_VERSION 100 + #define MPT3SAS_BUILD_VERSION 0 + #define MPT3SAS_RELEASE_VERSION 00 +--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c +@@ -3533,11 +3533,31 @@ diag_trigger_master_store(struct device + { + struct Scsi_Host *shost = class_to_shost(cdev); + struct MPT3SAS_ADAPTER *ioc = shost_priv(shost); ++ struct SL_WH_MASTER_TRIGGER_T *master_tg; + unsigned long flags; + ssize_t rc; ++ bool set = 1; + +- spin_lock_irqsave(&ioc->diag_trigger_lock, flags); + rc = min(sizeof(struct SL_WH_MASTER_TRIGGER_T), count); ++ ++ if (ioc->supports_trigger_pages) { ++ master_tg = kzalloc(sizeof(struct SL_WH_MASTER_TRIGGER_T), ++ GFP_KERNEL); ++ if (!master_tg) ++ return -ENOMEM; ++ ++ memcpy(master_tg, buf, rc); ++ if (!master_tg->MasterData) ++ set = 0; ++ if (mpt3sas_config_update_driver_trigger_pg1(ioc, master_tg, ++ set)) { ++ kfree(master_tg); ++ return -EFAULT; ++ } ++ kfree(master_tg); ++ } ++ ++ spin_lock_irqsave(&ioc->diag_trigger_lock, flags); + memset(&ioc->diag_trigger_master, 0, + sizeof(struct SL_WH_MASTER_TRIGGER_T)); + memcpy(&ioc->diag_trigger_master, buf, rc); +@@ -3589,11 +3609,31 @@ diag_trigger_event_store(struct device * + { + struct Scsi_Host *shost = class_to_shost(cdev); + struct MPT3SAS_ADAPTER *ioc = shost_priv(shost); ++ struct SL_WH_EVENT_TRIGGERS_T *event_tg; + unsigned long flags; + ssize_t sz; ++ bool set = 1; + +- spin_lock_irqsave(&ioc->diag_trigger_lock, flags); + sz = min(sizeof(struct SL_WH_EVENT_TRIGGERS_T), count); ++ if (ioc->supports_trigger_pages) { ++ event_tg = kzalloc(sizeof(struct SL_WH_EVENT_TRIGGERS_T), ++ GFP_KERNEL); ++ if (!event_tg) ++ return -ENOMEM; ++ ++ memcpy(event_tg, buf, sz); ++ if (!event_tg->ValidEntries) ++ set = 0; ++ if (mpt3sas_config_update_driver_trigger_pg2(ioc, event_tg, ++ set)) { ++ kfree(event_tg); ++ return -EFAULT; ++ } ++ kfree(event_tg); ++ } ++ ++ spin_lock_irqsave(&ioc->diag_trigger_lock, flags); ++ + memset(&ioc->diag_trigger_event, 0, + sizeof(struct SL_WH_EVENT_TRIGGERS_T)); + memcpy(&ioc->diag_trigger_event, buf, sz); +@@ -3644,11 +3684,31 @@ diag_trigger_scsi_store(struct device *c + { + struct Scsi_Host *shost = class_to_shost(cdev); + struct MPT3SAS_ADAPTER *ioc = shost_priv(shost); ++ struct SL_WH_SCSI_TRIGGERS_T *scsi_tg; + unsigned long flags; + ssize_t sz; ++ bool set = 1; ++ ++ sz = min(sizeof(struct SL_WH_SCSI_TRIGGERS_T), count); ++ if (ioc->supports_trigger_pages) { ++ scsi_tg = kzalloc(sizeof(struct SL_WH_SCSI_TRIGGERS_T), ++ GFP_KERNEL); ++ if (!scsi_tg) ++ return -ENOMEM; ++ ++ memcpy(scsi_tg, buf, sz); ++ if (!scsi_tg->ValidEntries) ++ set = 0; ++ if (mpt3sas_config_update_driver_trigger_pg3(ioc, scsi_tg, ++ set)) { ++ kfree(scsi_tg); ++ return -EFAULT; ++ } ++ kfree(scsi_tg); ++ } + + spin_lock_irqsave(&ioc->diag_trigger_lock, flags); +- sz = min(sizeof(ioc->diag_trigger_scsi), count); ++ + memset(&ioc->diag_trigger_scsi, 0, sizeof(ioc->diag_trigger_scsi)); + memcpy(&ioc->diag_trigger_scsi, buf, sz); + if (ioc->diag_trigger_scsi.ValidEntries > NUM_VALID_ENTRIES) +@@ -3698,11 +3758,30 @@ diag_trigger_mpi_store(struct device *cd + { + struct Scsi_Host *shost = class_to_shost(cdev); + struct MPT3SAS_ADAPTER *ioc = shost_priv(shost); ++ struct SL_WH_MPI_TRIGGERS_T *mpi_tg; + unsigned long flags; + ssize_t sz; ++ bool set = 1; + +- spin_lock_irqsave(&ioc->diag_trigger_lock, flags); + sz = min(sizeof(struct SL_WH_MPI_TRIGGERS_T), count); ++ if (ioc->supports_trigger_pages) { ++ mpi_tg = kzalloc(sizeof(struct SL_WH_MPI_TRIGGERS_T), ++ GFP_KERNEL); ++ if (!mpi_tg) ++ return -ENOMEM; ++ ++ memcpy(mpi_tg, buf, sz); ++ if (!mpi_tg->ValidEntries) ++ set = 0; ++ if (mpt3sas_config_update_driver_trigger_pg4(ioc, mpi_tg, ++ set)) { ++ kfree(mpi_tg); ++ return -EFAULT; ++ } ++ kfree(mpi_tg); ++ } ++ ++ spin_lock_irqsave(&ioc->diag_trigger_lock, flags); + memset(&ioc->diag_trigger_mpi, 0, + sizeof(ioc->diag_trigger_mpi)); + memcpy(&ioc->diag_trigger_mpi, buf, sz); diff --git a/patches.suse/scsi-mpt3sas-Use-irq_set_affinity_and_hint.patch b/patches.suse/scsi-mpt3sas-Use-irq_set_affinity_and_hint.patch new file mode 100644 index 0000000..5db427e --- /dev/null +++ b/patches.suse/scsi-mpt3sas-Use-irq_set_affinity_and_hint.patch @@ -0,0 +1,96 @@ +From: Nitesh Narayan Lal +Date: Fri, 3 Sep 2021 11:24:21 -0400 +Subject: scsi: mpt3sas: Use irq_set_affinity_and_hint() +Patch-mainline: v5.17-rc1 +Git-commit: fdb8ed13a77270c8e6e05b3ff9f4cb2f57e16d6a +References: jsc#PED_1491 + +The driver uses irq_set_affinity_hint() specifically for the high IOPS +queue interrupts for two purposes: + + - To set the affinity_hint which is consumed by the userspace for + distributing the interrupts + + - To apply an affinity that it provides + +The driver enforces its own affinity to bind the high IOPS queue interrupts +to the local NUMA node. However, irq_set_affinity_hint() applying the +provided cpumask as an affinity (if not NULL) for the interrupt is an +undocumented side effect. + +To remove this side effect irq_set_affinity_hint() has been marked +as deprecated and new interfaces have been introduced. Hence, replace the +irq_set_affinity_hint() with the new interface irq_set_affinity_and_hint() +where the provided mask needs to be applied as the affinity and +affinity_hint pointer needs to be set and replace with +irq_update_affinity_hint() where only affinity_hint needs to be updated. + +Signed-off-by: Nitesh Narayan Lal +Signed-off-by: Thomas Gleixner +Reviewed-by: Sreekanth Reddy +Link: https://lore.kernel.org/r/20210903152430.244937-6-nitesh@redhat.com +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_base.c | 21 ++++++++++----------- + 1 file changed, 10 insertions(+), 11 deletions(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_base.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c +@@ -3086,6 +3086,7 @@ _base_check_enable_msix(struct MPT3SAS_A + void + mpt3sas_base_free_irq(struct MPT3SAS_ADAPTER *ioc) + { ++ unsigned int irq; + struct adapter_reply_queue *reply_q, *next; + + if (list_empty(&ioc->reply_queue_list)) +@@ -3098,9 +3099,10 @@ mpt3sas_base_free_irq(struct MPT3SAS_ADA + continue; + } + +- if (ioc->smp_affinity_enable) +- irq_set_affinity_hint(pci_irq_vector(ioc->pdev, +- reply_q->msix_index), NULL); ++ if (ioc->smp_affinity_enable) { ++ irq = pci_irq_vector(ioc->pdev, reply_q->msix_index); ++ irq_update_affinity_hint(irq, NULL); ++ } + free_irq(pci_irq_vector(ioc->pdev, reply_q->msix_index), + reply_q); + kfree(reply_q); +@@ -3167,18 +3169,15 @@ out: + * @ioc: per adapter object + * + * The enduser would need to set the affinity via /proc/irq/#/smp_affinity +- * +- * It would nice if we could call irq_set_affinity, however it is not +- * an exported symbol + */ + static void + _base_assign_reply_queues(struct MPT3SAS_ADAPTER *ioc) + { +- unsigned int cpu, nr_cpus, nr_msix, index = 0; ++ unsigned int cpu, nr_cpus, nr_msix, index = 0, irq; + struct adapter_reply_queue *reply_q; +- int local_numa_node; + int iopoll_q_count = ioc->reply_queue_count - + ioc->iopoll_q_start_index; ++ const struct cpumask *mask; + + if (!_base_is_controller_msix_enabled(ioc)) + return; +@@ -3201,11 +3200,11 @@ _base_assign_reply_queues(struct MPT3SAS + * corresponding to high iops queues. + */ + if (ioc->high_iops_queues) { +- local_numa_node = dev_to_node(&ioc->pdev->dev); ++ mask = cpumask_of_node(dev_to_node(&ioc->pdev->dev)); + for (index = 0; index < ioc->high_iops_queues; + index++) { +- irq_set_affinity_hint(pci_irq_vector(ioc->pdev, +- index), cpumask_of_node(local_numa_node)); ++ irq = pci_irq_vector(ioc->pdev, index); ++ irq_set_affinity_and_hint(irq, mask); + } + } + diff --git a/patches.suse/scsi-mpt3sas-re-do-lost-mpt3sas-DMA-mask-fix.patch b/patches.suse/scsi-mpt3sas-re-do-lost-mpt3sas-DMA-mask-fix.patch new file mode 100644 index 0000000..f5d37ac --- /dev/null +++ b/patches.suse/scsi-mpt3sas-re-do-lost-mpt3sas-DMA-mask-fix.patch @@ -0,0 +1,48 @@ +From: Sreekanth Reddy +Date: Tue, 13 Sep 2022 17:35:38 +0530 +Subject: scsi: mpt3sas: re-do lost mpt3sas DMA mask fix +Patch-mainline: v6.1-rc3 +Git-commit: 1a2dcbdde82e3a5f1db9b2f4c48aa1aeba534fb2 +References: jsc#PED_1491 + +This is a re-do of commit e0e0747de0ea ("scsi: mpt3sas: Fix return value +check of dma_get_required_mask()"), which I ended up undoing in a +mis-merge in commit 62e6e5940c0c ("Merge tag 'scsi-misc' of +git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi"). + +The original commit message was + + scsi: mpt3sas: Fix return value check of dma_get_required_mask() + + Fix the incorrect return value check of dma_get_required_mask(). Due to + this incorrect check, the driver was always setting the DMA mask to 63 bit. + + Link: https://lore.kernel.org/r/20220913120538.18759-2-sreekanth.reddy@broadcom.com + Fixes: ba27c5cf286d ("scsi: mpt3sas: Don't change the DMA coherent mask after allocations") + Signed-off-by: Sreekanth Reddy + Signed-off-by: Martin K. Petersen + +and this fix was lost when I mis-merged the conflict with commit +9df650963bf6 ("scsi: mpt3sas: Don't change DMA mask while reallocating +pools"). + +Reported-by: Juergen Gross +Fixes: 62e6e5940c0c ("Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi") +Link: https://lore.kernel.org/all/CAHk-=wjaK-TxrNaGtFDpL9qNHL1MVkWXO1TT6vObD5tXMSC4Zg@mail.gmail.com +Signed-off-by: Linus Torvalds +Acked-by: Martin Wilck +--- + drivers/scsi/mpt3sas/mpt3sas_base.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/scsi/mpt3sas/mpt3sas_base.c ++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c +@@ -2993,7 +2993,7 @@ _base_config_dma_addressing(struct MPT3S + u64 coherent_dma_mask, dma_mask; + + if (ioc->is_mcpu_endpoint || sizeof(dma_addr_t) == 4 || +- dma_get_required_mask(&pdev->dev) <= 32) { ++ dma_get_required_mask(&pdev->dev) <= DMA_BIT_MASK(32)) { + ioc->dma_mask = 32; + coherent_dma_mask = dma_mask = DMA_BIT_MASK(32); + /* Set 63 bit DMA mask for all SAS3 and SAS35 controllers */ diff --git a/patches.suse/scsi-qedf-Populate-sysfs-attributes-for-vport.patch b/patches.suse/scsi-qedf-Populate-sysfs-attributes-for-vport.patch index c64d269..eaf4442 100644 --- a/patches.suse/scsi-qedf-Populate-sysfs-attributes-for-vport.patch +++ b/patches.suse/scsi-qedf-Populate-sysfs-attributes-for-vport.patch @@ -50,7 +50,3 @@ Acked-by: Thomas Bogendoerfer QEDF_INFO(&(base_qedf->dbg_ctx), QEDF_LOG_NPIV, "vn_port=%p.\n", vn_port); -<<<<<<< HEAD -======= - ->>>>>>> origin/SLE15-SP4 diff --git a/patches.suse/scsi-sd-add-concurrent-positioning-ranges-support.patch b/patches.suse/scsi-sd-add-concurrent-positioning-ranges-support.patch new file mode 100644 index 0000000..b415774 --- /dev/null +++ b/patches.suse/scsi-sd-add-concurrent-positioning-ranges-support.patch @@ -0,0 +1,147 @@ +From: Damien Le Moal +Date: Wed, 27 Oct 2021 11:22:20 +0900 +Subject: [PATCH] scsi: sd: add concurrent positioning ranges support +Git-commit: e815d36548f01797ce381be8f0b74f4ba9befd15 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Add the sd_read_cpr() function to the sd scsi disk driver to discover +if a device has multiple concurrent positioning ranges (i.e. multiple +actuators on an HDD). The existence of VPD page B9h indicates if a +device has multiple concurrent positioning ranges. The page content +describes each range supported by the device. + +sd_read_cpr() is called from sd_revalidate_disk() and uses the block +layer functions disk_alloc_independent_access_ranges() and +disk_set_independent_access_ranges() to represent the set of actuators +of the device as independent access ranges. + +The format of the Concurrent Positioning Ranges VPD page B9h is defined +in section 6.6.6 of SBC-5. + +Signed-off-by: Damien Le Moal +Reviewed-by: Hannes Reinecke +Reviewed-by: Martin K. Petersen +Reviewed-by: Keith Busch +Link: https://lore.kernel.org/r/20211027022223.183838-3-damien.lemoal@wdc.com +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/scsi/sd.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++ + drivers/scsi/sd.h | 1 + + 2 files changed, 82 insertions(+) + +diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c +index d8f6add416c0..55c0d951a446 100644 +--- a/drivers/scsi/sd.c ++++ b/drivers/scsi/sd.c +@@ -3088,6 +3088,86 @@ static void sd_read_security(struct scsi_disk *sdkp, unsigned char *buffer) + sdkp->security = 1; + } + ++static inline sector_t sd64_to_sectors(struct scsi_disk *sdkp, u8 *buf) ++{ ++ return logical_to_sectors(sdkp->device, get_unaligned_be64(buf)); ++} ++ ++/** ++ * sd_read_cpr - Query concurrent positioning ranges ++ * @sdkp: disk to query ++ */ ++static void sd_read_cpr(struct scsi_disk *sdkp) ++{ ++ struct blk_independent_access_ranges *iars = NULL; ++ unsigned char *buffer = NULL; ++ unsigned int nr_cpr = 0; ++ int i, vpd_len, buf_len = SD_BUF_SIZE; ++ u8 *desc; ++ ++ /* ++ * We need to have the capacity set first for the block layer to be ++ * able to check the ranges. ++ */ ++ if (sdkp->first_scan) ++ return; ++ ++ if (!sdkp->capacity) ++ goto out; ++ ++ /* ++ * Concurrent Positioning Ranges VPD: there can be at most 256 ranges, ++ * leading to a maximum page size of 64 + 256*32 bytes. ++ */ ++ buf_len = 64 + 256*32; ++ buffer = kmalloc(buf_len, GFP_KERNEL); ++ if (!buffer || scsi_get_vpd_page(sdkp->device, 0xb9, buffer, buf_len)) ++ goto out; ++ ++ /* We must have at least a 64B header and one 32B range descriptor */ ++ vpd_len = get_unaligned_be16(&buffer[2]) + 3; ++ if (vpd_len > buf_len || vpd_len < 64 + 32 || (vpd_len & 31)) { ++ sd_printk(KERN_ERR, sdkp, ++ "Invalid Concurrent Positioning Ranges VPD page\n"); ++ goto out; ++ } ++ ++ nr_cpr = (vpd_len - 64) / 32; ++ if (nr_cpr == 1) { ++ nr_cpr = 0; ++ goto out; ++ } ++ ++ iars = disk_alloc_independent_access_ranges(sdkp->disk, nr_cpr); ++ if (!iars) { ++ nr_cpr = 0; ++ goto out; ++ } ++ ++ desc = &buffer[64]; ++ for (i = 0; i < nr_cpr; i++, desc += 32) { ++ if (desc[0] != i) { ++ sd_printk(KERN_ERR, sdkp, ++ "Invalid Concurrent Positioning Range number\n"); ++ nr_cpr = 0; ++ break; ++ } ++ ++ iars->ia_range[i].sector = sd64_to_sectors(sdkp, desc + 8); ++ iars->ia_range[i].nr_sectors = sd64_to_sectors(sdkp, desc + 16); ++ } ++ ++out: ++ disk_set_independent_access_ranges(sdkp->disk, iars); ++ if (nr_cpr && sdkp->nr_actuators != nr_cpr) { ++ sd_printk(KERN_NOTICE, sdkp, ++ "%u concurrent positioning ranges\n", nr_cpr); ++ sdkp->nr_actuators = nr_cpr; ++ } ++ ++ kfree(buffer); ++} ++ + /* + * Determine the device's preferred I/O size for reads and writes + * unless the reported value is unreasonably small, large, not a +@@ -3203,6 +3283,7 @@ static int sd_revalidate_disk(struct gendisk *disk) + sd_read_app_tag_own(sdkp, buffer); + sd_read_write_same(sdkp, buffer); + sd_read_security(sdkp, buffer); ++ sd_read_cpr(sdkp); + } + + /* +diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h +index b59136c4125b..2e5932bde43d 100644 +--- a/drivers/scsi/sd.h ++++ b/drivers/scsi/sd.h +@@ -106,6 +106,7 @@ struct scsi_disk { + u8 protection_type;/* Data Integrity Field */ + u8 provisioning_mode; + u8 zeroing_mode; ++ u8 nr_actuators; /* Number of actuators */ + unsigned ATO : 1; /* state of disk ATO bit */ + unsigned cache_override : 1; /* temp override of WCE,RCD */ + unsigned WCE : 1; /* state of disk WCE bit */ +-- +2.35.3 + diff --git a/patches.suse/scsi-smartpqi-Add-PCI-ID-for-Adaptec-SmartHBA-2100-8.patch b/patches.suse/scsi-smartpqi-Add-PCI-ID-for-Adaptec-SmartHBA-2100-8.patch new file mode 100644 index 0000000..699c400 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Add-PCI-ID-for-Adaptec-SmartHBA-2100-8.patch @@ -0,0 +1,37 @@ +From: Mike McGowen +Date: Fri, 8 Jul 2022 13:47:20 -0500 +Subject: scsi: smartpqi: Add PCI ID for Adaptec SmartHBA 2100-8i +Patch-mainline: v6.0-rc1 +Git-commit: 44e68c4af5d2ce622527e0be28207956394891e2 +References: jsc#PED-1557 + +Add the PCI ID for (values in hex): + VID / DID / SVID / SDID + ---- ---- ---- ---- +Adaptec SmartHBA 2100-8i-o 9005 / 0285 / 9005 / 0659 + +Link: https://lore.kernel.org/r/165730604089.177165.17257514581321583667.stgit@brunhilda +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Reviewed-by: Kevin Barnett +Signed-off-by: Mike McGowen +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -9354,6 +9354,10 @@ static const struct pci_device_id pqi_pc + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, ++ PCI_VENDOR_ID_ADAPTEC2, 0x0659) ++ }, ++ { ++ PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + PCI_VENDOR_ID_ADAPTEC2, 0x0800) + }, + { diff --git a/patches.suse/scsi-smartpqi-Add-PCI-IDs-for-Lenovo-controllers.patch b/patches.suse/scsi-smartpqi-Add-PCI-IDs-for-Lenovo-controllers.patch new file mode 100644 index 0000000..d772505 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Add-PCI-IDs-for-Lenovo-controllers.patch @@ -0,0 +1,73 @@ +From: Mike McGowen +Date: Fri, 8 Jul 2022 13:47:26 -0500 +Subject: scsi: smartpqi: Add PCI IDs for Lenovo controllers +Patch-mainline: v6.0-rc1 +Git-commit: 2a9c2ba2bc47d2df8791a1e32788c76cafa5584c +References: jsc#PED-1557 + +Add PCI IDs for Lenovo controllers (values in hex): + + VID / DID / SVID / SDID + ---- ---- ---- ---- +Lenovo 4350-8i HBA 9005 / 028f / 1d49 / 0220 +Lenovo 4350-16i HBA 9005 / 028f / 1d49 / 0221 +Lenovo 5350-8i RAID 9005 / 028f / 1d49 / 0520 +Lenovo 5350-8i Internal RAID 9005 / 028f / 1d49 / 0522 +Lenovo 9350-8i RAID 9005 / 028f / 1d49 / 0620 +Lenovo 9350-8i Internal RAID 9005 / 028f / 1d49 / 0621 +Lenovo 9350-16i RAID 9005 / 028f / 1d49 / 0622 +Lenovo 9350-16i Internal RAID 9005 / 028f / 1d49 / 0623 + +Link: https://lore.kernel.org/r/165730604598.177165.9910276232981721083.stgit@brunhilda +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Reviewed-by: Kevin Barnett +Signed-off-by: Mike McGowen +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 32 ++++++++++++++++++++++++++++++++ + 1 file changed, 32 insertions(+) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -9830,6 +9830,38 @@ static const struct pci_device_id pqi_pc + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, ++ PCI_VENDOR_ID_LENOVO, 0x0220) ++ }, ++ { ++ PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, ++ PCI_VENDOR_ID_LENOVO, 0x0221) ++ }, ++ { ++ PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, ++ PCI_VENDOR_ID_LENOVO, 0x0520) ++ }, ++ { ++ PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, ++ PCI_VENDOR_ID_LENOVO, 0x0522) ++ }, ++ { ++ PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, ++ PCI_VENDOR_ID_LENOVO, 0x0620) ++ }, ++ { ++ PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, ++ PCI_VENDOR_ID_LENOVO, 0x0621) ++ }, ++ { ++ PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, ++ PCI_VENDOR_ID_LENOVO, 0x0622) ++ }, ++ { ++ PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, ++ PCI_VENDOR_ID_LENOVO, 0x0623) ++ }, ++ { ++ PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + PCI_ANY_ID, PCI_ANY_ID) + }, + { 0 } diff --git a/patches.suse/scsi-smartpqi-Add-PCI-IDs-for-ramaxel-controllers.patch b/patches.suse/scsi-smartpqi-Add-PCI-IDs-for-ramaxel-controllers.patch new file mode 100644 index 0000000..8a60a9c --- /dev/null +++ b/patches.suse/scsi-smartpqi-Add-PCI-IDs-for-ramaxel-controllers.patch @@ -0,0 +1,44 @@ +From: Murthy Bhat +Date: Fri, 8 Jul 2022 13:47:00 -0500 +Subject: scsi: smartpqi: Add PCI IDs for ramaxel controllers +Patch-mainline: v6.0-rc1 +Git-commit: dab5378485f601174a297a069d040ffb92918bf5 +References: jsc#PED-1557 + +Add the following controllers (values in hex): + + VID / DID / SVID / SDID + ---- / ---- / ---- / ---- +Ramaxel FBGF-RAD PM8204 9005 / 028F / 1CC4 / 0101 +Ramaxel FBGF-RAD PM8222 9005 / 028F / 1CC4 / 0201 + +Link: https://lore.kernel.org/r/165730602045.177165.3720208650043407285.stgit@brunhilda +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Reviewed-by: Mike McGowen +Reviewed-by: Kevin Barnett +Signed-off-by: Murthy Bhat +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -9748,6 +9748,14 @@ static const struct pci_device_id pqi_pc + }, + { + PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, ++ 0x1cc4, 0x0101) ++ }, ++ { ++ PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, ++ 0x1cc4, 0x0201) ++ }, ++ { ++ PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f, + PCI_ANY_ID, PCI_ANY_ID) + }, + { 0 } diff --git a/patches.suse/scsi-smartpqi-Add-controller-fw-version-to-console-l.patch b/patches.suse/scsi-smartpqi-Add-controller-fw-version-to-console-l.patch new file mode 100644 index 0000000..5568bd5 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Add-controller-fw-version-to-console-l.patch @@ -0,0 +1,35 @@ +From: Gilbert Wu +Date: Fri, 8 Jul 2022 13:46:55 -0500 +Subject: scsi: smartpqi: Add controller fw version to console log +Patch-mainline: v6.0-rc1 +Git-commit: 1d393227fc76cf0887077d6752af0bb2288e5802 +References: jsc#PED-1557 + +Print controller firmware version to OS message log during driver +initialization or after OFA. + +Link: https://lore.kernel.org/r/165730601536.177165.17698744242908911822.stgit@brunhilda +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Reviewed-by: Mike McGowen +Reviewed-by: Kevin Barnett +Signed-off-by: Gilbert Wu +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -7469,6 +7469,9 @@ static int pqi_get_ctrl_product_details( + sizeof(identify->vendor_id)); + ctrl_info->vendor[sizeof(identify->vendor_id)] = '\0'; + ++ dev_info(&ctrl_info->pci_dev->dev, ++ "Firmware version: %s\n", ctrl_info->firmware_version); ++ + out: + kfree(identify); + diff --git a/patches.suse/scsi-smartpqi-Add-ctrl-ready-timeout-module-paramete.patch b/patches.suse/scsi-smartpqi-Add-ctrl-ready-timeout-module-paramete.patch new file mode 100644 index 0000000..0ce3406 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Add-ctrl-ready-timeout-module-paramete.patch @@ -0,0 +1,110 @@ +From: Kevin Barnett +Date: Fri, 8 Jul 2022 13:47:56 -0500 +Subject: scsi: smartpqi: Add ctrl ready timeout module parameter +Patch-mainline: v6.0-rc1 +Git-commit: 6d567dfee0b7b4c66fb1f62d59a2e62e2709b453 +References: jsc#PED-1557 + +Allow user to override the default driver timeout for controller ready. + +There are some rare configurations which require the driver to wait longer +than the normal 3 minutes for the controller to complete its bootup +sequence and be ready to accept commands from the driver. + +The module parameter is: + +ctrl_ready_timeout= { 0 | 30-1800 } + +and specifies the timeout in seconds for the driver to wait for controller +ready. The valid range is 0 or 30-1800. The default value is 0, which +causes the driver to use a timeout of 180 seconds (3 minutes). + +Link: https://lore.kernel.org/r/165730607666.177165.9221211345284471213.stgit@brunhilda +Reviewed-by: Scott Teel +Reviewed-by: Mike McGowen +Signed-off-by: Kevin Barnett +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 28 ++++++++++++++++++++++++++++ + drivers/scsi/smartpqi/smartpqi_sis.c | 4 +++- + drivers/scsi/smartpqi/smartpqi_sis.h | 2 ++ + 3 files changed, 33 insertions(+), 1 deletion(-) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -181,6 +181,12 @@ module_param_named(disable_managed_inter + MODULE_PARM_DESC(disable_managed_interrupts, + "Disable the kernel automatically assigning SMP affinity to IRQs."); + ++static unsigned int pqi_ctrl_ready_timeout_secs; ++module_param_named(ctrl_ready_timeout, ++ pqi_ctrl_ready_timeout_secs, uint, 0644); ++MODULE_PARM_DESC(ctrl_ready_timeout, ++ "Timeout in seconds for driver to wait for controller ready."); ++ + static char *raid_levels[] = { + "RAID-0", + "RAID-4", +@@ -9089,9 +9095,31 @@ static void pqi_process_lockup_action_pa + DRIVER_NAME_SHORT, pqi_lockup_action_param); + } + ++#define PQI_CTRL_READY_TIMEOUT_PARAM_MIN_SECS 30 ++#define PQI_CTRL_READY_TIMEOUT_PARAM_MAX_SECS (30 * 60) ++ ++static void pqi_process_ctrl_ready_timeout_param(void) ++{ ++ if (pqi_ctrl_ready_timeout_secs == 0) ++ return; ++ ++ if (pqi_ctrl_ready_timeout_secs < PQI_CTRL_READY_TIMEOUT_PARAM_MIN_SECS) { ++ pr_warn("%s: ctrl_ready_timeout parm of %u second(s) is less than minimum timeout of %d seconds - setting timeout to %d seconds\n", ++ DRIVER_NAME_SHORT, pqi_ctrl_ready_timeout_secs, PQI_CTRL_READY_TIMEOUT_PARAM_MIN_SECS, PQI_CTRL_READY_TIMEOUT_PARAM_MIN_SECS); ++ pqi_ctrl_ready_timeout_secs = PQI_CTRL_READY_TIMEOUT_PARAM_MIN_SECS; ++ } else if (pqi_ctrl_ready_timeout_secs > PQI_CTRL_READY_TIMEOUT_PARAM_MAX_SECS) { ++ pr_warn("%s: ctrl_ready_timeout parm of %u seconds is greater than maximum timeout of %d seconds - setting timeout to %d seconds\n", ++ DRIVER_NAME_SHORT, pqi_ctrl_ready_timeout_secs, PQI_CTRL_READY_TIMEOUT_PARAM_MAX_SECS, PQI_CTRL_READY_TIMEOUT_PARAM_MAX_SECS); ++ pqi_ctrl_ready_timeout_secs = PQI_CTRL_READY_TIMEOUT_PARAM_MAX_SECS; ++ } ++ ++ sis_ctrl_ready_timeout_secs = pqi_ctrl_ready_timeout_secs; ++} ++ + static void pqi_process_module_params(void) + { + pqi_process_lockup_action_param(); ++ pqi_process_ctrl_ready_timeout_param(); + } + + #if defined(CONFIG_PM) +--- a/drivers/scsi/smartpqi/smartpqi_sis.c ++++ b/drivers/scsi/smartpqi/smartpqi_sis.c +@@ -86,6 +86,8 @@ struct sis_base_struct { + + #pragma pack() + ++unsigned int sis_ctrl_ready_timeout_secs = SIS_CTRL_READY_TIMEOUT_SECS; ++ + static int sis_wait_for_ctrl_ready_with_timeout(struct pqi_ctrl_info *ctrl_info, + unsigned int timeout_secs) + { +@@ -122,7 +124,7 @@ static int sis_wait_for_ctrl_ready_with_ + int sis_wait_for_ctrl_ready(struct pqi_ctrl_info *ctrl_info) + { + return sis_wait_for_ctrl_ready_with_timeout(ctrl_info, +- SIS_CTRL_READY_TIMEOUT_SECS); ++ sis_ctrl_ready_timeout_secs); + } + + int sis_wait_for_ctrl_ready_resume(struct pqi_ctrl_info *ctrl_info) +--- a/drivers/scsi/smartpqi/smartpqi_sis.h ++++ b/drivers/scsi/smartpqi/smartpqi_sis.h +@@ -32,4 +32,6 @@ void sis_soft_reset(struct pqi_ctrl_info + u32 sis_get_product_id(struct pqi_ctrl_info *ctrl_info); + int sis_wait_for_fw_triage_completion(struct pqi_ctrl_info *ctrl_info); + ++extern unsigned int sis_ctrl_ready_timeout_secs; ++ + #endif /* _SMARTPQI_SIS_H */ diff --git a/patches.suse/scsi-smartpqi-Add-driver-support-for-multi-LUN-devic.patch b/patches.suse/scsi-smartpqi-Add-driver-support-for-multi-LUN-devic.patch new file mode 100644 index 0000000..78482de --- /dev/null +++ b/patches.suse/scsi-smartpqi-Add-driver-support-for-multi-LUN-devic.patch @@ -0,0 +1,420 @@ +From: Kumar Meiyappan +Date: Fri, 8 Jul 2022 13:47:10 -0500 +Subject: scsi: smartpqi: Add driver support for multi-LUN devices +Patch-mainline: v6.0-rc1 +Git-commit: 904f2bfda65e051906e79030b7cbfa2f5db3e5f4 +References: jsc#PED-1557 + +Add driver support for up to 256 LUNs per device. + +Update AIO path to pass the appropriate LUN number for base-code to target +the correct LUN. + +Update RAID IO path to pass the appropriate LUN number for FW to target the +correct LUN. + +Pass the correct LUN number while doing a LUN reset. + +Count the outstanding commands based on LUN number. While removing a +Multi-LUN device, wait for all outstanding commands to complete for all +LUNs. + +Add Feature bit support. + +Link: https://lore.kernel.org/r/165730603067.177165.14016422176841798336.stgit@brunhilda +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Reviewed-by: Mike McGowen +Reviewed-by: Kevin Barnett +Signed-off-by: Kumar Meiyappan +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi.h | 15 ++++- + drivers/scsi/smartpqi/smartpqi_init.c | 92 ++++++++++++++++++++++------------ + 2 files changed, 72 insertions(+), 35 deletions(-) + +--- a/drivers/scsi/smartpqi/smartpqi.h ++++ b/drivers/scsi/smartpqi/smartpqi.h +@@ -293,7 +293,8 @@ struct pqi_raid_path_request { + u8 additional_cdb_bytes_usage : 3; + u8 reserved5 : 3; + u8 cdb[16]; +- u8 reserved6[12]; ++ u8 reserved6[11]; ++ u8 ml_device_lun_number; + __le32 timeout; + struct pqi_sg_descriptor sg_descriptors[PQI_MAX_EMBEDDED_SG_DESCRIPTORS]; + }; +@@ -467,7 +468,8 @@ struct pqi_task_management_request { + struct pqi_iu_header header; + __le16 request_id; + __le16 nexus_id; +- u8 reserved[2]; ++ u8 reserved; ++ u8 ml_device_lun_number; + __le16 timeout; + u8 lun_number[8]; + __le16 protocol_specific; +@@ -864,7 +866,8 @@ struct pqi_config_table_firmware_feature + #define PQI_FIRMWARE_FEATURE_UNIQUE_WWID_IN_REPORT_PHYS_LUN 16 + #define PQI_FIRMWARE_FEATURE_FW_TRIAGE 17 + #define PQI_FIRMWARE_FEATURE_RPL_EXTENDED_FORMAT_4_5 18 +-#define PQI_FIRMWARE_FEATURE_MAXIMUM 18 ++#define PQI_FIRMWARE_FEATURE_MULTI_LUN_DEVICE_SUPPORT 21 ++#define PQI_FIRMWARE_FEATURE_MAXIMUM 21 + + struct pqi_config_table_debug { + struct pqi_config_table_section_header header; +@@ -1082,6 +1085,8 @@ struct pqi_stream_data { + u32 last_accessed; + }; + ++#define PQI_MAX_LUNS_PER_DEVICE 256 ++ + struct pqi_scsi_dev { + int devtype; /* as reported by INQUIRY command */ + u8 device_type; /* as reported by */ +@@ -1125,6 +1130,7 @@ struct pqi_scsi_dev { + u8 phy_id; + u8 ncq_prio_enable; + u8 ncq_prio_support; ++ u8 multi_lun_device_lun_count; + bool raid_bypass_configured; /* RAID bypass configured */ + bool raid_bypass_enabled; /* RAID bypass enabled */ + u32 next_bypass_group[RAID_MAP_MAX_DATA_DISKS_PER_ROW]; +@@ -1140,7 +1146,7 @@ struct pqi_scsi_dev { + struct list_head delete_list_entry; + + struct pqi_stream_data stream_data[NUM_STREAMS_PER_LUN]; +- atomic_t scsi_cmds_outstanding; ++ atomic_t scsi_cmds_outstanding[PQI_MAX_LUNS_PER_DEVICE]; + atomic_t raid_bypass_cnt; + }; + +@@ -1333,6 +1339,7 @@ struct pqi_ctrl_info { + u8 tmf_iu_timeout_supported : 1; + u8 firmware_triage_supported : 1; + u8 rpl_extended_format_4_5_supported : 1; ++ u8 multi_lun_device_supported : 1; + u8 enable_r1_writes : 1; + u8 enable_r5_writes : 1; + u8 enable_r6_writes : 1; +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -94,7 +94,7 @@ static void pqi_ofa_setup_host_buffer(st + static void pqi_ofa_free_host_buffer(struct pqi_ctrl_info *ctrl_info); + static int pqi_ofa_host_memory_update(struct pqi_ctrl_info *ctrl_info); + static int pqi_device_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info, +- struct pqi_scsi_dev *device, unsigned long timeout_msecs); ++ struct pqi_scsi_dev *device, u8 lun, unsigned long timeout_msecs); + + /* for flags argument to pqi_submit_raid_request_synchronous() */ + #define PQI_SYNC_FLAGS_INTERRUPTABLE 0x1 +@@ -1597,7 +1597,9 @@ static int pqi_get_physical_device_info( + &id_phys->alternate_paths_phys_connector, + sizeof(device->phys_connector)); + device->bay = id_phys->phys_bay_in_box; +- ++ device->multi_lun_device_lun_count = id_phys->multi_lun_device_lun_count; ++ if (!device->multi_lun_device_lun_count) ++ device->multi_lun_device_lun_count = 1; + if ((id_phys->even_more_flags & PQI_DEVICE_PHY_MAP_SUPPORTED) && + id_phys->phy_count) + device->phy_id = +@@ -1880,15 +1882,18 @@ static int pqi_add_device(struct pqi_ctr + static inline void pqi_remove_device(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device) + { + int rc; ++ int lun; + +- rc = pqi_device_wait_for_pending_io(ctrl_info, device, +- PQI_REMOVE_DEVICE_PENDING_IO_TIMEOUT_MSECS); +- if (rc) +- dev_err(&ctrl_info->pci_dev->dev, +- "scsi %d:%d:%d:%d removing device with %d outstanding command(s)\n", +- ctrl_info->scsi_host->host_no, device->bus, +- device->target, device->lun, +- atomic_read(&device->scsi_cmds_outstanding)); ++ for (lun = 0; lun < device->multi_lun_device_lun_count; lun++) { ++ rc = pqi_device_wait_for_pending_io(ctrl_info, device, lun, ++ PQI_REMOVE_DEVICE_PENDING_IO_TIMEOUT_MSECS); ++ if (rc) ++ dev_err(&ctrl_info->pci_dev->dev, ++ "scsi %d:%d:%d:%d removing device with %d outstanding command(s)\n", ++ ctrl_info->scsi_host->host_no, device->bus, ++ device->target, lun, ++ atomic_read(&device->scsi_cmds_outstanding[lun])); ++ } + + if (pqi_is_logical_device(device)) + scsi_remove_device(device->sdev); +@@ -2061,6 +2066,9 @@ static void pqi_scsi_update_device(struc + existing_device->box_index = new_device->box_index; + existing_device->phys_box_on_bus = new_device->phys_box_on_bus; + existing_device->phy_connected_dev_type = new_device->phy_connected_dev_type; ++ existing_device->multi_lun_device_lun_count = new_device->multi_lun_device_lun_count; ++ if (!existing_device->multi_lun_device_lun_count) ++ existing_device->multi_lun_device_lun_count = 1; + memcpy(existing_device->box, new_device->box, + sizeof(existing_device->box)); + memcpy(existing_device->phys_connector, new_device->phys_connector, +@@ -5463,6 +5471,7 @@ static int pqi_raid_submit_scsi_cmd_with + put_unaligned_le16(io_request->index, &request->request_id); + request->error_index = request->request_id; + memcpy(request->lun_number, device->scsi3addr, sizeof(request->lun_number)); ++ request->ml_device_lun_number = (u8)scmd->device->lun; + + cdb_length = min_t(size_t, scmd->cmd_len, sizeof(request->cdb)); + memcpy(request->cdb, scmd->cmnd, cdb_length); +@@ -5627,7 +5636,9 @@ static int pqi_aio_submit_io(struct pqi_ + int rc; + struct pqi_io_request *io_request; + struct pqi_aio_path_request *request; ++ struct pqi_scsi_dev *device; + ++ device = scmd->device->hostdata; + io_request = pqi_alloc_io_request(ctrl_info); + io_request->io_complete_callback = pqi_aio_io_complete; + io_request->scmd = scmd; +@@ -5643,6 +5654,8 @@ static int pqi_aio_submit_io(struct pqi_ + request->command_priority = io_high_prio; + put_unaligned_le16(io_request->index, &request->request_id); + request->error_index = request->request_id; ++ if (!pqi_is_logical_device(device) && ctrl_info->multi_lun_device_supported) ++ put_unaligned_le64(((scmd->device->lun) << 8), &request->lun_number); + if (cdb_length > sizeof(request->cdb)) + cdb_length = sizeof(request->cdb); + request->cdb_length = cdb_length; +@@ -5852,7 +5865,7 @@ void pqi_prep_for_scsi_done(struct scsi_ + return; + } + +- atomic_dec(&device->scsi_cmds_outstanding); ++ atomic_dec(&device->scsi_cmds_outstanding[scmd->device->lun]); + } + + static bool pqi_is_parity_write_stream(struct pqi_ctrl_info *ctrl_info, +@@ -5947,7 +5960,7 @@ static int pqi_scsi_queue_command(struct + return 0; + } + +- atomic_inc(&device->scsi_cmds_outstanding); ++ atomic_inc(&device->scsi_cmds_outstanding[scmd->device->lun]); + + ctrl_info = shost_to_hba(shost); + +@@ -5993,7 +6006,7 @@ static int pqi_scsi_queue_command(struct + + out: + if (rc) +- atomic_dec(&device->scsi_cmds_outstanding); ++ atomic_dec(&device->scsi_cmds_outstanding[scmd->device->lun]); + + return rc; + } +@@ -6133,7 +6146,7 @@ static void pqi_fail_io_queued_for_devic + #define PQI_PENDING_IO_WARNING_TIMEOUT_SECS 10 + + static int pqi_device_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info, +- struct pqi_scsi_dev *device, unsigned long timeout_msecs) ++ struct pqi_scsi_dev *device, u8 lun, unsigned long timeout_msecs) + { + int cmds_outstanding; + unsigned long start_jiffies; +@@ -6143,7 +6156,7 @@ static int pqi_device_wait_for_pending_i + start_jiffies = jiffies; + warning_timeout = (PQI_PENDING_IO_WARNING_TIMEOUT_SECS * HZ) + start_jiffies; + +- while ((cmds_outstanding = atomic_read(&device->scsi_cmds_outstanding)) > 0) { ++ while ((cmds_outstanding = atomic_read(&device->scsi_cmds_outstanding[lun])) > 0) { + pqi_check_ctrl_health(ctrl_info); + if (pqi_ctrl_offline(ctrl_info)) + return -ENXIO; +@@ -6152,14 +6165,14 @@ static int pqi_device_wait_for_pending_i + dev_err(&ctrl_info->pci_dev->dev, + "scsi %d:%d:%d:%d: timed out after %lu seconds waiting for %d outstanding command(s)\n", + ctrl_info->scsi_host->host_no, device->bus, device->target, +- device->lun, msecs_waiting / 1000, cmds_outstanding); ++ lun, msecs_waiting / 1000, cmds_outstanding); + return -ETIMEDOUT; + } + if (time_after(jiffies, warning_timeout)) { + dev_warn(&ctrl_info->pci_dev->dev, + "scsi %d:%d:%d:%d: waiting %lu seconds for %d outstanding command(s)\n", + ctrl_info->scsi_host->host_no, device->bus, device->target, +- device->lun, msecs_waiting / 1000, cmds_outstanding); ++ lun, msecs_waiting / 1000, cmds_outstanding); + warning_timeout = (PQI_PENDING_IO_WARNING_TIMEOUT_SECS * HZ) + jiffies; + } + usleep_range(1000, 2000); +@@ -6179,7 +6192,7 @@ static void pqi_lun_reset_complete(struc + #define PQI_LUN_RESET_POLL_COMPLETION_SECS 10 + + static int pqi_wait_for_lun_reset_completion(struct pqi_ctrl_info *ctrl_info, +- struct pqi_scsi_dev *device, struct completion *wait) ++ struct pqi_scsi_dev *device, u8 lun, struct completion *wait) + { + int rc; + unsigned int wait_secs; +@@ -6201,10 +6214,10 @@ static int pqi_wait_for_lun_reset_comple + } + + wait_secs += PQI_LUN_RESET_POLL_COMPLETION_SECS; +- cmds_outstanding = atomic_read(&device->scsi_cmds_outstanding); ++ cmds_outstanding = atomic_read(&device->scsi_cmds_outstanding[lun]); + dev_warn(&ctrl_info->pci_dev->dev, + "scsi %d:%d:%d:%d: waiting %u seconds for LUN reset to complete (%d command(s) outstanding)\n", +- ctrl_info->scsi_host->host_no, device->bus, device->target, device->lun, wait_secs, cmds_outstanding); ++ ctrl_info->scsi_host->host_no, device->bus, device->target, lun, wait_secs, cmds_outstanding); + } + + return rc; +@@ -6212,13 +6225,15 @@ static int pqi_wait_for_lun_reset_comple + + #define PQI_LUN_RESET_FIRMWARE_TIMEOUT_SECS 30 + +-static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device) ++static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd) + { + int rc; + struct pqi_io_request *io_request; + DECLARE_COMPLETION_ONSTACK(wait); + struct pqi_task_management_request *request; ++ struct pqi_scsi_dev *device; + ++ device = scmd->device->hostdata; + io_request = pqi_alloc_io_request(ctrl_info); + io_request->io_complete_callback = pqi_lun_reset_complete; + io_request->context = &wait; +@@ -6232,6 +6247,8 @@ static int pqi_lun_reset(struct pqi_ctrl + put_unaligned_le16(io_request->index, &request->request_id); + memcpy(request->lun_number, device->scsi3addr, + sizeof(request->lun_number)); ++ if (!pqi_is_logical_device(device) && ctrl_info->multi_lun_device_supported) ++ request->ml_device_lun_number = (u8)scmd->device->lun; + request->task_management_function = SOP_TASK_MANAGEMENT_LUN_RESET; + if (ctrl_info->tmf_iu_timeout_supported) + put_unaligned_le16(PQI_LUN_RESET_FIRMWARE_TIMEOUT_SECS, &request->timeout); +@@ -6239,7 +6256,7 @@ static int pqi_lun_reset(struct pqi_ctrl + pqi_start_io(ctrl_info, &ctrl_info->queue_groups[PQI_DEFAULT_QUEUE_GROUP], RAID_PATH, + io_request); + +- rc = pqi_wait_for_lun_reset_completion(ctrl_info, device, &wait); ++ rc = pqi_wait_for_lun_reset_completion(ctrl_info, device, (u8)scmd->device->lun, &wait); + if (rc == 0) + rc = io_request->status; + +@@ -6253,15 +6270,17 @@ static int pqi_lun_reset(struct pqi_ctrl + #define PQI_LUN_RESET_PENDING_IO_TIMEOUT_MSECS (10 * 60 * 1000) + #define PQI_LUN_RESET_FAILED_PENDING_IO_TIMEOUT_MSECS (2 * 60 * 1000) + +-static int pqi_lun_reset_with_retries(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device) ++static int pqi_lun_reset_with_retries(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd) + { + int reset_rc; + int wait_rc; + unsigned int retries; + unsigned long timeout_msecs; ++ struct pqi_scsi_dev *device; + ++ device = scmd->device->hostdata; + for (retries = 0;;) { +- reset_rc = pqi_lun_reset(ctrl_info, device); ++ reset_rc = pqi_lun_reset(ctrl_info, scmd); + if (reset_rc == 0 || reset_rc == -ENODEV || ++retries > PQI_LUN_RESET_RETRIES) + break; + msleep(PQI_LUN_RESET_RETRY_INTERVAL_MSECS); +@@ -6270,18 +6289,19 @@ static int pqi_lun_reset_with_retries(st + timeout_msecs = reset_rc ? PQI_LUN_RESET_FAILED_PENDING_IO_TIMEOUT_MSECS : + PQI_LUN_RESET_PENDING_IO_TIMEOUT_MSECS; + +- wait_rc = pqi_device_wait_for_pending_io(ctrl_info, device, timeout_msecs); ++ wait_rc = pqi_device_wait_for_pending_io(ctrl_info, device, scmd->device->lun, timeout_msecs); + if (wait_rc && reset_rc == 0) + reset_rc = wait_rc; + + return reset_rc == 0 ? SUCCESS : FAILED; + } + +-static int pqi_device_reset(struct pqi_ctrl_info *ctrl_info, +- struct pqi_scsi_dev *device) ++static int pqi_device_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd) + { + int rc; ++ struct pqi_scsi_dev *device; + ++ device = scmd->device->hostdata; + pqi_ctrl_block_requests(ctrl_info); + pqi_ctrl_wait_until_quiesced(ctrl_info); + pqi_fail_io_queued_for_device(ctrl_info, device); +@@ -6289,7 +6309,7 @@ static int pqi_device_reset(struct pqi_c + if (rc) + rc = FAILED; + else +- rc = pqi_lun_reset_with_retries(ctrl_info, device); ++ rc = pqi_lun_reset_with_retries(ctrl_info, scmd); + pqi_ctrl_unblock_requests(ctrl_info); + + return rc; +@@ -6311,18 +6331,18 @@ static int pqi_eh_device_reset_handler(s + dev_err(&ctrl_info->pci_dev->dev, + "resetting scsi %d:%d:%d:%d due to cmd 0x%02x\n", + shost->host_no, +- device->bus, device->target, device->lun, ++ device->bus, device->target, (u32)scmd->device->lun, + scmd->cmd_len > 0 ? scmd->cmnd[0] : 0xff); + + pqi_check_ctrl_health(ctrl_info); + if (pqi_ctrl_offline(ctrl_info)) + rc = FAILED; + else +- rc = pqi_device_reset(ctrl_info, device); ++ rc = pqi_device_reset(ctrl_info, scmd); + + dev_err(&ctrl_info->pci_dev->dev, + "reset of scsi %d:%d:%d:%d: %s\n", +- shost->host_no, device->bus, device->target, device->lun, ++ shost->host_no, device->bus, device->target, (u32)scmd->device->lun, + rc == SUCCESS ? "SUCCESS" : "FAILED"); + + mutex_unlock(&ctrl_info->lun_reset_mutex); +@@ -7296,6 +7316,7 @@ static int pqi_register_scsi(struct pqi_ + shost->this_id = -1; + shost->max_channel = PQI_MAX_BUS; + shost->max_cmd_len = MAX_COMMAND_SIZE; ++ shost->max_lun = PQI_MAX_LUNS_PER_DEVICE; + shost->max_lun = ~0; + shost->max_id = ~0; + shost->max_sectors = ctrl_info->max_sectors; +@@ -7643,6 +7664,9 @@ static void pqi_ctrl_update_feature_flag + case PQI_FIRMWARE_FEATURE_RPL_EXTENDED_FORMAT_4_5: + ctrl_info->rpl_extended_format_4_5_supported = firmware_feature->enabled; + break; ++ case PQI_FIRMWARE_FEATURE_MULTI_LUN_DEVICE_SUPPORT: ++ ctrl_info->multi_lun_device_supported = firmware_feature->enabled; ++ break; + } + + pqi_firmware_feature_status(ctrl_info, firmware_feature); +@@ -7743,6 +7767,11 @@ static struct pqi_firmware_feature pqi_f + .feature_bit = PQI_FIRMWARE_FEATURE_RPL_EXTENDED_FORMAT_4_5, + .feature_status = pqi_ctrl_update_feature_flags, + }, ++ { ++ .feature_name = "Multi-LUN Target", ++ .feature_bit = PQI_FIRMWARE_FEATURE_MULTI_LUN_DEVICE_SUPPORT, ++ .feature_status = pqi_ctrl_update_feature_flags, ++ }, + }; + + static void pqi_process_firmware_features( +@@ -7844,6 +7873,7 @@ static void pqi_ctrl_reset_config(struct + ctrl_info->tmf_iu_timeout_supported = false; + ctrl_info->firmware_triage_supported = false; + ctrl_info->rpl_extended_format_4_5_supported = false; ++ ctrl_info->multi_lun_device_supported = false; + } + + static int pqi_process_config_table(struct pqi_ctrl_info *ctrl_info) diff --git a/patches.suse/scsi-smartpqi-Avoid-drive-spin-down-during-suspend.patch b/patches.suse/scsi-smartpqi-Avoid-drive-spin-down-during-suspend.patch new file mode 100644 index 0000000..c62c6a3 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Avoid-drive-spin-down-during-suspend.patch @@ -0,0 +1,56 @@ +From: Sagar Biradar +Date: Tue, 1 Feb 2022 15:48:33 -0600 +Subject: scsi: smartpqi: Avoid drive spin-down during suspend +Patch-mainline: v5.18-rc1 +Git-commit: b73357a1fd39cec82b654421110e35e8167930d5 +References: jsc#PED-1557 + +On certain systems (based on PCI IDs), when the OS transitions the system +into the suspend (S3) state, the BMIC flush cache command will indicate a +system RESTART instead of SUSPEND. + +This avoids drive spin-down. + +Link: https://lore.kernel.org/r/164375211330.440833.7203813692110347698.stgit@brunhilda.pdev.net +Reviewed-by: Kevin Barnett +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Signed-off-by: Sagar Biradar +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -8976,10 +8976,19 @@ static void pqi_process_module_params(vo + pqi_process_lockup_action_param(); + } + ++static inline enum bmic_flush_cache_shutdown_event pqi_get_flush_cache_shutdown_event(struct pci_dev *pci_dev) ++{ ++ if (pci_dev->subsystem_vendor == PCI_VENDOR_ID_ADAPTEC2 && pci_dev->subsystem_device == 0x1304) ++ return RESTART; ++ return SUSPEND; ++} ++ + static __maybe_unused int pqi_suspend(struct pci_dev *pci_dev, pm_message_t state) + { + struct pqi_ctrl_info *ctrl_info; ++ enum bmic_flush_cache_shutdown_event shutdown_event; + ++ shutdown_event = pqi_get_flush_cache_shutdown_event(pci_dev); + ctrl_info = pci_get_drvdata(pci_dev); + + pqi_wait_until_ofa_finished(ctrl_info); +@@ -8989,7 +8998,7 @@ static __maybe_unused int pqi_suspend(st + pqi_ctrl_block_device_reset(ctrl_info); + pqi_ctrl_block_requests(ctrl_info); + pqi_ctrl_wait_until_quiesced(ctrl_info); +- pqi_flush_cache(ctrl_info, SUSPEND); ++ pqi_flush_cache(ctrl_info, shutdown_event); + pqi_stop_heartbeat_timer(ctrl_info); + + pqi_crash_if_pending_command(ctrl_info); diff --git a/patches.suse/scsi-smartpqi-Close-write-read-holes.patch b/patches.suse/scsi-smartpqi-Close-write-read-holes.patch new file mode 100644 index 0000000..b37e9e0 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Close-write-read-holes.patch @@ -0,0 +1,54 @@ +From: Mike McGowen +Date: Fri, 8 Jul 2022 13:47:05 -0500 +Subject: scsi: smartpqi: Close write read holes +Patch-mainline: v6.0-rc1 +Git-commit: 297bdc540f0e391568788f8ece3020653748a26f +References: jsc#PED-1557 + +Insert a minimum 1 millisecond delay after writing to a register before +reading from it. + +SIS and PQI registers that can be both written to and read from can return +stale data if read from too soon after having been written to. + +There is no read/write ordering or hazard detection on the inbound path to +the MSGU from the PCIe bus, therefore reads could pass writes. + +Link: https://lore.kernel.org/r/165730602555.177165.11181012469428348394.stgit@brunhilda +Reviewed-by: Scott Teel +Signed-off-by: Mike McGowen +Co-developed-by: Kevin Barnett +Signed-off-by: Kevin Barnett +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_sis.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/scsi/smartpqi/smartpqi_sis.c ++++ b/drivers/scsi/smartpqi/smartpqi_sis.c +@@ -194,6 +194,7 @@ static int sis_send_sync_cmd(struct pqi_ + + /* Disable doorbell interrupts by masking all interrupts. */ + writel(~0, ®isters->sis_interrupt_mask); ++ usleep_range(1000, 2000); + + /* + * Force the completion of the interrupt mask register write before +@@ -383,6 +384,7 @@ static int sis_wait_for_doorbell_bit_to_ + static inline int sis_set_doorbell_bit(struct pqi_ctrl_info *ctrl_info, u32 bit) + { + writel(bit, &ctrl_info->registers->sis_host_to_ctrl_doorbell); ++ usleep_range(1000, 2000); + + return sis_wait_for_doorbell_bit_to_clear(ctrl_info, bit); + } +@@ -423,6 +425,7 @@ int sis_reenable_sis_mode(struct pqi_ctr + void sis_write_driver_scratch(struct pqi_ctrl_info *ctrl_info, u32 value) + { + writel(value, &ctrl_info->registers->sis_driver_scratch); ++ usleep_range(1000, 2000); + } + + u32 sis_read_driver_scratch(struct pqi_ctrl_info *ctrl_info) diff --git a/patches.suse/scsi-smartpqi-Eliminate-drive-spin-down-on-warm-boot.patch b/patches.suse/scsi-smartpqi-Eliminate-drive-spin-down-on-warm-boot.patch new file mode 100644 index 0000000..a4e7040 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Eliminate-drive-spin-down-on-warm-boot.patch @@ -0,0 +1,52 @@ +From: Sagar Biradar +Date: Tue, 1 Feb 2022 15:48:08 -0600 +Subject: scsi: smartpqi: Eliminate drive spin down on warm boot +Patch-mainline: v5.18-rc1 +Git-commit: 70ba20be4bb1f560bba7288bd12fbb918823e576 +References: jsc#PED-1557 + +Avoid drive spin down during a warm boot. + +Call the BMIC Flush Cache command (0xc2) to indicate the reason for the +flush cache (shutdown, hibernate, suspend, or restart). + +Link: https://lore.kernel.org/r/164375208810.440833.11254644025650871435.stgit@brunhilda.pdev.net +Reviewed-by: Mike McGowen +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Signed-off-by: Sagar Biradar +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -8902,6 +8902,7 @@ static void pqi_shutdown(struct pci_dev + { + int rc; + struct pqi_ctrl_info *ctrl_info; ++ enum bmic_flush_cache_shutdown_event shutdown_event; + + ctrl_info = pci_get_drvdata(pci_dev); + if (!ctrl_info) { +@@ -8917,11 +8918,16 @@ static void pqi_shutdown(struct pci_dev + pqi_ctrl_block_requests(ctrl_info); + pqi_ctrl_wait_until_quiesced(ctrl_info); + ++ if (system_state == SYSTEM_RESTART) ++ shutdown_event = RESTART; ++ else ++ shutdown_event = SHUTDOWN; ++ + /* + * Write all data in the controller's battery-backed cache to + * storage. + */ +- rc = pqi_flush_cache(ctrl_info, SHUTDOWN); ++ rc = pqi_flush_cache(ctrl_info, shutdown_event); + if (rc) + dev_err(&pci_dev->dev, + "unable to flush controller cache\n"); diff --git a/patches.suse/scsi-smartpqi-Enable-SATA-NCQ-priority-in-sysfs.patch b/patches.suse/scsi-smartpqi-Enable-SATA-NCQ-priority-in-sysfs.patch new file mode 100644 index 0000000..4c19e65 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Enable-SATA-NCQ-priority-in-sysfs.patch @@ -0,0 +1,231 @@ +From: Gilbert Wu +Date: Tue, 1 Feb 2022 15:48:03 -0600 +Subject: scsi: smartpqi: Enable SATA NCQ priority in sysfs +Patch-mainline: v5.18-rc1 +Git-commit: 2a47834d9452812f68c8894994e95adad56e4b60 +References: jsc#PED-1557 + +Add device attribute 'sas_ncq_prio_enable' to enable SATA NCQ priority +support and provide I/O priority in SCSI command and pass priority +information to controller firmware. + +This device attribute works only when device has NCQ priority support and +controller firmware can handle I/Os with NCQ priority attribute. + +Link: https://lore.kernel.org/r/164375208306.440833.7392577382127815362.stgit@brunhilda.pdev.net +Reviewed-by: Mike McGowen +Reviewed-by: Scott Teel +Signed-off-by: Gilbert Wu +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi.h | 2 + drivers/scsi/smartpqi/smartpqi_init.c | 119 ++++++++++++++++++++++++++++++++-- + 2 files changed, 117 insertions(+), 4 deletions(-) + +--- a/drivers/scsi/smartpqi/smartpqi.h ++++ b/drivers/scsi/smartpqi/smartpqi.h +@@ -1127,6 +1127,8 @@ struct pqi_scsi_dev { + u8 box[8]; + u16 phys_connector[8]; + u8 phy_id; ++ u8 ncq_prio_enable; ++ u8 ncq_prio_support; + bool raid_bypass_configured; /* RAID bypass configured */ + bool raid_bypass_enabled; /* RAID bypass enabled */ + u32 next_bypass_group; +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -68,7 +68,7 @@ static int pqi_submit_raid_request_synch + static int pqi_aio_submit_io(struct pqi_ctrl_info *ctrl_info, + struct scsi_cmnd *scmd, u32 aio_handle, u8 *cdb, + unsigned int cdb_length, struct pqi_queue_group *queue_group, +- struct pqi_encryption_info *encryption_info, bool raid_bypass); ++ struct pqi_encryption_info *encryption_info, bool raid_bypass, bool io_high_prio); + static int pqi_aio_submit_r1_write_io(struct pqi_ctrl_info *ctrl_info, + struct scsi_cmnd *scmd, struct pqi_queue_group *queue_group, + struct pqi_encryption_info *encryption_info, struct pqi_scsi_dev *device, +@@ -1549,6 +1549,7 @@ no_buffer: + device->volume_offline = volume_offline; + } + ++#define PQI_DEVICE_NCQ_PRIO_SUPPORTED 0x01 + #define PQI_DEVICE_PHY_MAP_SUPPORTED 0x10 + + static int pqi_get_physical_device_info(struct pqi_ctrl_info *ctrl_info, +@@ -1597,6 +1598,10 @@ static int pqi_get_physical_device_info( + else + device->phy_id = 0xFF; + ++ device->ncq_prio_support = ++ ((get_unaligned_le32(&id_phys->misc_drive_flags) >> 16) & ++ PQI_DEVICE_NCQ_PRIO_SUPPORTED); ++ + return 0; + } + +@@ -3007,7 +3012,7 @@ static int pqi_raid_bypass_submit_scsi_c + + return pqi_aio_submit_io(ctrl_info, scmd, rmd.aio_handle, + rmd.cdb, rmd.cdb_length, queue_group, +- encryption_info_ptr, true); ++ encryption_info_ptr, true, false); + } + + #define PQI_STATUS_IDLE 0x0 +@@ -5560,18 +5565,55 @@ static void pqi_aio_io_complete(struct p + pqi_scsi_done(scmd); + } + ++static inline bool pqi_is_io_high_prioity(struct pqi_ctrl_info *ctrl_info, ++ struct pqi_scsi_dev *device, struct scsi_cmnd *scmd) ++{ ++ bool io_high_prio; ++ int priority_class; ++ ++ io_high_prio = false; ++ if (device->ncq_prio_enable) { ++ priority_class = ++ IOPRIO_PRIO_CLASS(req_get_ioprio(scsi_cmd_to_rq(scmd))); ++ if (priority_class == IOPRIO_CLASS_RT) { ++ /* set NCQ priority for read/write command */ ++ switch (scmd->cmnd[0]) { ++ case WRITE_16: ++ case READ_16: ++ case WRITE_12: ++ case READ_12: ++ case WRITE_10: ++ case READ_10: ++ case WRITE_6: ++ case READ_6: ++ io_high_prio = true; ++ break; ++ default: ++ break; ++ } ++ } ++ } ++ ++ return io_high_prio; ++} ++ + static inline int pqi_aio_submit_scsi_cmd(struct pqi_ctrl_info *ctrl_info, + struct pqi_scsi_dev *device, struct scsi_cmnd *scmd, + struct pqi_queue_group *queue_group) + { ++ bool io_high_prio; ++ ++ io_high_prio = pqi_is_io_high_prioity(ctrl_info, device, scmd); + return pqi_aio_submit_io(ctrl_info, scmd, device->aio_handle, +- scmd->cmnd, scmd->cmd_len, queue_group, NULL, false); ++ scmd->cmnd, scmd->cmd_len, queue_group, NULL, ++ false, io_high_prio); + } + + static int pqi_aio_submit_io(struct pqi_ctrl_info *ctrl_info, + struct scsi_cmnd *scmd, u32 aio_handle, u8 *cdb, + unsigned int cdb_length, struct pqi_queue_group *queue_group, +- struct pqi_encryption_info *encryption_info, bool raid_bypass) ++ struct pqi_encryption_info *encryption_info, bool raid_bypass, ++ bool io_high_prio) + { + int rc; + struct pqi_io_request *io_request; +@@ -5589,6 +5631,7 @@ static int pqi_aio_submit_io(struct pqi_ + put_unaligned_le32(aio_handle, &request->nexus_id); + put_unaligned_le32(scsi_bufflen(scmd), &request->buffer_length); + request->task_attribute = SOP_TASK_ATTRIBUTE_SIMPLE; ++ request->command_priority = io_high_prio; + put_unaligned_le16(io_request->index, &request->request_id); + request->error_index = request->request_id; + if (cdb_length > sizeof(request->cdb)) +@@ -7121,6 +7164,71 @@ static ssize_t pqi_raid_bypass_cnt_show( + return scnprintf(buffer, PAGE_SIZE, "0x%x\n", raid_bypass_cnt); + } + ++static ssize_t pqi_sas_ncq_prio_enable_show(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct pqi_ctrl_info *ctrl_info; ++ struct scsi_device *sdev; ++ struct pqi_scsi_dev *device; ++ unsigned long flags; ++ int output_len = 0; ++ ++ sdev = to_scsi_device(dev); ++ ctrl_info = shost_to_hba(sdev->host); ++ ++ spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); ++ ++ device = sdev->hostdata; ++ if (!device) { ++ spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); ++ return -ENODEV; ++ } ++ ++ output_len = snprintf(buf, PAGE_SIZE, "%d\n", ++ device->ncq_prio_enable); ++ spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); ++ ++ return output_len; ++} ++ ++static ssize_t pqi_sas_ncq_prio_enable_store(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t count) ++{ ++ struct pqi_ctrl_info *ctrl_info; ++ struct scsi_device *sdev; ++ struct pqi_scsi_dev *device; ++ unsigned long flags; ++ u8 ncq_prio_enable = 0; ++ ++ if (kstrtou8(buf, 0, &ncq_prio_enable)) ++ return -EINVAL; ++ ++ sdev = to_scsi_device(dev); ++ ctrl_info = shost_to_hba(sdev->host); ++ ++ spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); ++ ++ device = sdev->hostdata; ++ ++ if (!device) { ++ spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); ++ return -ENODEV; ++ } ++ ++ if (!device->ncq_prio_support || ++ !device->is_physical_device) { ++ spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); ++ return -EINVAL; ++ } ++ ++ device->ncq_prio_enable = ncq_prio_enable; ++ ++ spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); ++ ++ return strlen(buf); ++} ++ + static DEVICE_ATTR(lunid, 0444, pqi_lunid_show, NULL); + static DEVICE_ATTR(unique_id, 0444, pqi_unique_id_show, NULL); + static DEVICE_ATTR(path_info, 0444, pqi_path_info_show, NULL); +@@ -7128,6 +7236,8 @@ static DEVICE_ATTR(sas_address, 0444, pq + static DEVICE_ATTR(ssd_smart_path_enabled, 0444, pqi_ssd_smart_path_enabled_show, NULL); + static DEVICE_ATTR(raid_level, 0444, pqi_raid_level_show, NULL); + static DEVICE_ATTR(raid_bypass_cnt, 0444, pqi_raid_bypass_cnt_show, NULL); ++static DEVICE_ATTR(sas_ncq_prio_enable, 0644, ++ pqi_sas_ncq_prio_enable_show, pqi_sas_ncq_prio_enable_store); + + static struct attribute *pqi_sdev_attrs[] = { + &dev_attr_lunid.attr, +@@ -7137,6 +7247,7 @@ static struct attribute *pqi_sdev_attrs[ + &dev_attr_ssd_smart_path_enabled.attr, + &dev_attr_raid_level.attr, + &dev_attr_raid_bypass_cnt.attr, ++ &dev_attr_sas_ncq_prio_enable.attr, + NULL + }; + diff --git a/patches.suse/scsi-smartpqi-Expose-SAS-address-for-SATA-drives.patch b/patches.suse/scsi-smartpqi-Expose-SAS-address-for-SATA-drives.patch new file mode 100644 index 0000000..3a5ac8f --- /dev/null +++ b/patches.suse/scsi-smartpqi-Expose-SAS-address-for-SATA-drives.patch @@ -0,0 +1,162 @@ +From: Kevin Barnett +Date: Tue, 1 Feb 2022 15:48:53 -0600 +Subject: scsi: smartpqi: Expose SAS address for SATA drives +Patch-mainline: v5.18-rc1 +Git-commit: 00598b056aa6d46c7a6819efa850ec9d0d690d76 +References: jsc#PED-1557 + +Remove UNIQUE_WWID_IN_REPORT_PHYS_LUN PQI feature. + +This feature was originally added to solve a problem with NVMe drives, but +this problem has since been solved a different way, so this PQI feature is +no longer required for any type of drive. + +The kernel was not creating symbolic links in sysfs between SATA drives and +their enclosure. + +The driver was enabling the UNIQUE_WWID_IN_REPORT_PHYS_LUN PQI feature, +which causes the FW to return a WWID for SATA drives that is derived from a +unique ID read from the SATA drive itself. The driver was exposing this +WWID as the drive's SAS address. However, because this SAS address does not +match the SAS address returned by an enclosure's SES Page 0xA data, the +Linux kernel was not able to match a SATA drive with its enclosure. + +Link: https://lore.kernel.org/r/164375213346.440833.12379222470149882747.stgit@brunhilda.pdev.net +Reviewed-by: Mike McGowen +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Signed-off-by: Kevin Barnett +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi.h | 2 - + drivers/scsi/smartpqi/smartpqi_init.c | 43 ++-------------------------------- + 2 files changed, 3 insertions(+), 42 deletions(-) + +--- a/drivers/scsi/smartpqi/smartpqi.h ++++ b/drivers/scsi/smartpqi/smartpqi.h +@@ -1141,7 +1141,6 @@ struct pqi_scsi_dev { + struct pqi_stream_data stream_data[NUM_STREAMS_PER_LUN]; + atomic_t scsi_cmds_outstanding; + atomic_t raid_bypass_cnt; +- u8 page_83_identifier[16]; + }; + + /* VPD inquiry pages */ +@@ -1331,7 +1330,6 @@ struct pqi_ctrl_info { + u8 soft_reset_handshake_supported : 1; + u8 raid_iu_timeout_supported : 1; + u8 tmf_iu_timeout_supported : 1; +- u8 unique_wwid_in_report_phys_lun_supported : 1; + u8 firmware_triage_supported : 1; + u8 rpl_extended_format_4_5_supported : 1; + u8 enable_r1_writes : 1; +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -1588,9 +1588,6 @@ static int pqi_get_physical_device_info( + sizeof(device->phys_connector)); + device->bay = id_phys->phys_bay_in_box; + +- memcpy(&device->page_83_identifier, &id_phys->page_83_identifier, +- sizeof(device->page_83_identifier)); +- + if ((id_phys->even_more_flags & PQI_DEVICE_PHY_MAP_SUPPORTED) && + id_phys->phy_count) + device->phy_id = +@@ -2281,18 +2278,6 @@ static inline void pqi_mask_device(u8 *s + scsi3addr[3] |= 0xc0; + } + +-static inline bool pqi_is_device_with_sas_address(struct pqi_scsi_dev *device) +-{ +- switch (device->device_type) { +- case SA_DEVICE_TYPE_SAS: +- case SA_DEVICE_TYPE_EXPANDER_SMP: +- case SA_DEVICE_TYPE_SES: +- return true; +- } +- +- return false; +-} +- + static inline bool pqi_is_multipath_device(struct pqi_scsi_dev *device) + { + if (pqi_is_logical_device(device)) +@@ -2306,17 +2291,6 @@ static inline bool pqi_expose_device(str + return !device->is_physical_device || !pqi_skip_device(device->scsi3addr); + } + +-static inline void pqi_set_physical_device_wwid(struct pqi_ctrl_info *ctrl_info, +- struct pqi_scsi_dev *device, struct report_phys_lun_16byte_wwid *phys_lun) +-{ +- if (ctrl_info->unique_wwid_in_report_phys_lun_supported || +- ctrl_info->rpl_extended_format_4_5_supported || +- pqi_is_device_with_sas_address(device)) +- memcpy(device->wwid, phys_lun->wwid, sizeof(device->wwid)); +- else +- memcpy(&device->wwid[8], device->page_83_identifier, 8); +-} +- + static int pqi_update_scsi_devices(struct pqi_ctrl_info *ctrl_info) + { + int i; +@@ -2484,7 +2458,7 @@ static int pqi_update_scsi_devices(struc + pqi_assign_bus_target_lun(device); + + if (device->is_physical_device) { +- pqi_set_physical_device_wwid(ctrl_info, device, phys_lun); ++ memcpy(device->wwid, phys_lun->wwid, sizeof(device->wwid)); + if ((phys_lun->device_flags & + CISS_REPORT_PHYS_DEV_FLAG_AIO_ENABLED) && + phys_lun->aio_handle) { +@@ -2497,8 +2471,7 @@ static int pqi_update_scsi_devices(struc + sizeof(device->volume_id)); + } + +- if (pqi_is_device_with_sas_address(device)) +- device->sas_address = get_unaligned_be64(&device->wwid[8]); ++ device->sas_address = get_unaligned_be64(&device->wwid[8]); + + new_device_list[num_valid_devices++] = device; + } +@@ -7087,7 +7060,7 @@ static ssize_t pqi_sas_address_show(stru + spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); + + device = sdev->hostdata; +- if (!device || !pqi_is_device_with_sas_address(device)) { ++ if (!device) { + spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); + return -ENODEV; + } +@@ -7643,10 +7616,6 @@ static void pqi_ctrl_update_feature_flag + case PQI_FIRMWARE_FEATURE_TMF_IU_TIMEOUT: + ctrl_info->tmf_iu_timeout_supported = firmware_feature->enabled; + break; +- case PQI_FIRMWARE_FEATURE_UNIQUE_WWID_IN_REPORT_PHYS_LUN: +- ctrl_info->unique_wwid_in_report_phys_lun_supported = +- firmware_feature->enabled; +- break; + case PQI_FIRMWARE_FEATURE_FW_TRIAGE: + ctrl_info->firmware_triage_supported = firmware_feature->enabled; + pqi_save_fw_triage_setting(ctrl_info, firmware_feature->enabled); +@@ -7745,11 +7714,6 @@ static struct pqi_firmware_feature pqi_f + .feature_status = pqi_firmware_feature_status, + }, + { +- .feature_name = "Unique WWID in Report Physical LUN", +- .feature_bit = PQI_FIRMWARE_FEATURE_UNIQUE_WWID_IN_REPORT_PHYS_LUN, +- .feature_status = pqi_ctrl_update_feature_flags, +- }, +- { + .feature_name = "Firmware Triage", + .feature_bit = PQI_FIRMWARE_FEATURE_FW_TRIAGE, + .feature_status = pqi_ctrl_update_feature_flags, +@@ -7858,7 +7822,6 @@ static void pqi_ctrl_reset_config(struct + ctrl_info->enable_r6_writes = false; + ctrl_info->raid_iu_timeout_supported = false; + ctrl_info->tmf_iu_timeout_supported = false; +- ctrl_info->unique_wwid_in_report_phys_lun_supported = false; + ctrl_info->firmware_triage_supported = false; + ctrl_info->rpl_extended_format_4_5_supported = false; + } diff --git a/patches.suse/scsi-smartpqi-Fix-BUILD_BUG_ON-statements.patch b/patches.suse/scsi-smartpqi-Fix-BUILD_BUG_ON-statements.patch new file mode 100644 index 0000000..b5b9e6f --- /dev/null +++ b/patches.suse/scsi-smartpqi-Fix-BUILD_BUG_ON-statements.patch @@ -0,0 +1,76 @@ +From: Mike McGowen +Date: Tue, 1 Feb 2022 15:49:03 -0600 +Subject: scsi: smartpqi: Fix BUILD_BUG_ON() statements +Patch-mainline: v5.18-rc1 +Git-commit: 5e6935864d814c3a62dd0945fd155634481f11c2 +References: jsc#PED-1557 + +Add calls to the functions at the beginning driver initialization. + +The BUILD_BUG_ON() statements that are currently in functions named +verify_structures() in the modules smartpqi_init.c and smartpqi_sis.c do +not work as currently implemented. + +Link: https://lore.kernel.org/r/164375214355.440833.13129778749209816497.stgit@brunhilda.pdev.net +Reviewed-by: Kevin Barnett +Reviewed-by: Scott Teel +Reviewed-by: Scott Benesh +Signed-off-by: Mike McGowen +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 5 ++++- + drivers/scsi/smartpqi/smartpqi_sis.c | 2 +- + drivers/scsi/smartpqi/smartpqi_sis.h | 1 + + 3 files changed, 6 insertions(+), 2 deletions(-) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -54,6 +54,7 @@ MODULE_DESCRIPTION("Driver for Microchip + MODULE_VERSION(DRIVER_VERSION); + MODULE_LICENSE("GPL"); + ++static void pqi_verify_structures(void); + static void pqi_take_ctrl_offline(struct pqi_ctrl_info *ctrl_info, + enum pqi_ctrl_shutdown_reason ctrl_shutdown_reason); + static void pqi_ctrl_offline_worker(struct work_struct *work); +@@ -9703,6 +9704,8 @@ static int __init pqi_init(void) + int rc; + + pr_info(DRIVER_NAME "\n"); ++ pqi_verify_structures(); ++ sis_verify_structures(); + + pqi_sas_transport_template = sas_attach_transport(&pqi_sas_transport_functions); + if (!pqi_sas_transport_template) +@@ -9726,7 +9729,7 @@ static void __exit pqi_cleanup(void) + module_init(pqi_init); + module_exit(pqi_cleanup); + +-static void __attribute__((unused)) verify_structures(void) ++static void pqi_verify_structures(void) + { + BUILD_BUG_ON(offsetof(struct pqi_ctrl_registers, + sis_host_to_ctrl_doorbell) != 0x20); +--- a/drivers/scsi/smartpqi/smartpqi_sis.c ++++ b/drivers/scsi/smartpqi/smartpqi_sis.c +@@ -479,7 +479,7 @@ int sis_wait_for_fw_triage_completion(st + return rc; + } + +-static void __attribute__((unused)) verify_structures(void) ++void sis_verify_structures(void) + { + BUILD_BUG_ON(offsetof(struct sis_base_struct, + revision) != 0x0); +--- a/drivers/scsi/smartpqi/smartpqi_sis.h ++++ b/drivers/scsi/smartpqi/smartpqi_sis.h +@@ -12,6 +12,7 @@ + #if !defined(_SMARTPQI_SIS_H) + #define _SMARTPQI_SIS_H + ++void sis_verify_structures(void); + int sis_wait_for_ctrl_ready(struct pqi_ctrl_info *ctrl_info); + int sis_wait_for_ctrl_ready_resume(struct pqi_ctrl_info *ctrl_info); + bool sis_is_firmware_running(struct pqi_ctrl_info *ctrl_info); diff --git a/patches.suse/scsi-smartpqi-Fix-NUMA-node-not-updated-during-init.patch b/patches.suse/scsi-smartpqi-Fix-NUMA-node-not-updated-during-init.patch new file mode 100644 index 0000000..7d793b4 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Fix-NUMA-node-not-updated-during-init.patch @@ -0,0 +1,56 @@ +From: Mike McGowen +Date: Tue, 1 Feb 2022 15:48:58 -0600 +Subject: scsi: smartpqi: Fix NUMA node not updated during init +Patch-mainline: v5.18-rc1 +Git-commit: c52efc9238569038242e28f247546bb5b04dc8a1 +References: jsc#PED-1557 + +Correct NUMA node association when calling pqi_pci_probe(). + +In the function pqi_pci_probe(), the driver makes an OS call to get the +NUMA node associated with a controller. If the call returns that there is +no associated node, the driver attempts to set it to node 0. The problem is +that a different local variable (cp_node) was being used to do this, but +the original local variable (node) was still being used in the call to +pqi_alloc_ctrl_info(). + +The value of "node" is not updated if the conditional after the call to +dev_to_node() evaluates to TRUE. + +Link: https://lore.kernel.org/r/164375213850.440833.5243014942807747074.stgit@brunhilda.pdev.net +Reviewed-by: Kevin Barnett +Reviewed-by: Scott Teel +Signed-off-by: Mike McGowen +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -8811,7 +8811,7 @@ static int pqi_pci_probe(struct pci_dev + const struct pci_device_id *id) + { + int rc; +- int node, cp_node; ++ int node; + struct pqi_ctrl_info *ctrl_info; + + pqi_print_ctrl_info(pci_dev, id); +@@ -8830,10 +8830,10 @@ static int pqi_pci_probe(struct pci_dev + + node = dev_to_node(&pci_dev->dev); + if (node == NUMA_NO_NODE) { +- cp_node = cpu_to_node(0); +- if (cp_node == NUMA_NO_NODE) +- cp_node = 0; +- set_dev_node(&pci_dev->dev, cp_node); ++ node = cpu_to_node(0); ++ if (node == NUMA_NO_NODE) ++ node = 0; ++ set_dev_node(&pci_dev->dev, node); + } + + ctrl_info = pqi_alloc_ctrl_info(node); diff --git a/patches.suse/scsi-smartpqi-Fix-PCI-control-linkdown-system-hang.patch b/patches.suse/scsi-smartpqi-Fix-PCI-control-linkdown-system-hang.patch new file mode 100644 index 0000000..817ea1d --- /dev/null +++ b/patches.suse/scsi-smartpqi-Fix-PCI-control-linkdown-system-hang.patch @@ -0,0 +1,224 @@ +From: Sagar Biradar +Date: Fri, 8 Jul 2022 13:47:15 -0500 +Subject: scsi: smartpqi: Fix PCI control linkdown system hang +Patch-mainline: v6.0-rc1 +Git-commit: 331f7e998b20c406e8d3689b1c0d77c6325a5d4b +References: jsc#PED-1557 + +Fail all outstanding requests after a PCI linkdown. + +Block access to device SCSI attributes during the following conditions: + + "Cable pull" is called PQI_CTRL_SURPRISE_REMOVAL. + + "PCIe Link Down" is called PQI_CTRL_GRACEFUL_REMOVAL. + +Block access to device SCSI attributes during and in rare instances when +the controller goes offline. + +Either outstanding requests or the access of SCSI attributes post linkdown +can lead to a hang. + +Post linkdown, driver does not fail the outstanding requests leading to +long wait time before all the IOs eventually fail. + +Also access of the SCSI attributes by host applications can lead to a +system hang. + +Link: https://lore.kernel.org/r/165730603578.177165.4699352086827187263.stgit@brunhilda +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Reviewed-by: Mike McGowen +Reviewed-by: Kevin Barnett +Signed-off-by: Sagar Biradar +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi.h | 7 ++++ + drivers/scsi/smartpqi/smartpqi_init.c | 48 +++++++++++++++++++++++++++++++--- + drivers/scsi/smartpqi/smartpqi_sis.c | 2 - + 3 files changed, 52 insertions(+), 5 deletions(-) + +--- a/drivers/scsi/smartpqi/smartpqi.h ++++ b/drivers/scsi/smartpqi/smartpqi.h +@@ -1269,6 +1269,12 @@ struct pqi_event { + #define PQI_CTRL_PRODUCT_REVISION_A 0 + #define PQI_CTRL_PRODUCT_REVISION_B 1 + ++enum pqi_ctrl_removal_state { ++ PQI_CTRL_PRESENT = 0, ++ PQI_CTRL_GRACEFUL_REMOVAL, ++ PQI_CTRL_SURPRISE_REMOVAL ++}; ++ + struct pqi_ctrl_info { + unsigned int ctrl_id; + struct pci_dev *pci_dev; +@@ -1389,6 +1395,7 @@ struct pqi_ctrl_info { + struct work_struct ofa_quiesce_work; + u32 ofa_bytes_requested; + u16 ofa_cancel_reason; ++ enum pqi_ctrl_removal_state ctrl_removal_state; + }; + + enum pqi_ctrl_mode { +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -95,6 +95,7 @@ static void pqi_ofa_free_host_buffer(str + static int pqi_ofa_host_memory_update(struct pqi_ctrl_info *ctrl_info); + static int pqi_device_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info, + struct pqi_scsi_dev *device, u8 lun, unsigned long timeout_msecs); ++static void pqi_fail_all_outstanding_requests(struct pqi_ctrl_info *ctrl_info); + + /* for flags argument to pqi_submit_raid_request_synchronous() */ + #define PQI_SYNC_FLAGS_INTERRUPTABLE 0x1 +@@ -6157,9 +6158,11 @@ static int pqi_device_wait_for_pending_i + warning_timeout = (PQI_PENDING_IO_WARNING_TIMEOUT_SECS * HZ) + start_jiffies; + + while ((cmds_outstanding = atomic_read(&device->scsi_cmds_outstanding[lun])) > 0) { +- pqi_check_ctrl_health(ctrl_info); +- if (pqi_ctrl_offline(ctrl_info)) +- return -ENXIO; ++ if (ctrl_info->ctrl_removal_state != PQI_CTRL_GRACEFUL_REMOVAL) { ++ pqi_check_ctrl_health(ctrl_info); ++ if (pqi_ctrl_offline(ctrl_info)) ++ return -ENXIO; ++ } + msecs_waiting = jiffies_to_msecs(jiffies - start_jiffies); + if (msecs_waiting >= timeout_msecs) { + dev_err(&ctrl_info->pci_dev->dev, +@@ -6945,6 +6948,9 @@ static ssize_t pqi_unique_id_show(struct + sdev = to_scsi_device(dev); + ctrl_info = shost_to_hba(sdev->host); + ++ if (pqi_ctrl_offline(ctrl_info)) ++ return -ENODEV; ++ + spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); + + device = sdev->hostdata; +@@ -6981,6 +6987,9 @@ static ssize_t pqi_lunid_show(struct dev + sdev = to_scsi_device(dev); + ctrl_info = shost_to_hba(sdev->host); + ++ if (pqi_ctrl_offline(ctrl_info)) ++ return -ENODEV; ++ + spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); + + device = sdev->hostdata; +@@ -7016,6 +7025,9 @@ static ssize_t pqi_path_info_show(struct + sdev = to_scsi_device(dev); + ctrl_info = shost_to_hba(sdev->host); + ++ if (pqi_ctrl_offline(ctrl_info)) ++ return -ENODEV; ++ + spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); + + device = sdev->hostdata; +@@ -7093,6 +7105,9 @@ static ssize_t pqi_sas_address_show(stru + sdev = to_scsi_device(dev); + ctrl_info = shost_to_hba(sdev->host); + ++ if (pqi_ctrl_offline(ctrl_info)) ++ return -ENODEV; ++ + spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); + + device = sdev->hostdata; +@@ -7119,6 +7134,9 @@ static ssize_t pqi_ssd_smart_path_enable + sdev = to_scsi_device(dev); + ctrl_info = shost_to_hba(sdev->host); + ++ if (pqi_ctrl_offline(ctrl_info)) ++ return -ENODEV; ++ + spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); + + device = sdev->hostdata; +@@ -7148,6 +7166,9 @@ static ssize_t pqi_raid_level_show(struc + sdev = to_scsi_device(dev); + ctrl_info = shost_to_hba(sdev->host); + ++ if (pqi_ctrl_offline(ctrl_info)) ++ return -ENODEV; ++ + spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); + + device = sdev->hostdata; +@@ -7178,6 +7199,9 @@ static ssize_t pqi_raid_bypass_cnt_show( + sdev = to_scsi_device(dev); + ctrl_info = shost_to_hba(sdev->host); + ++ if (pqi_ctrl_offline(ctrl_info)) ++ return -ENODEV; ++ + spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); + + device = sdev->hostdata; +@@ -7205,6 +7229,9 @@ static ssize_t pqi_sas_ncq_prio_enable_s + sdev = to_scsi_device(dev); + ctrl_info = shost_to_hba(sdev->host); + ++ if (pqi_ctrl_offline(ctrl_info)) ++ return -ENODEV; ++ + spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); + + device = sdev->hostdata; +@@ -8547,7 +8574,6 @@ static void pqi_free_interrupts(struct p + + static void pqi_free_ctrl_resources(struct pqi_ctrl_info *ctrl_info) + { +- pqi_stop_heartbeat_timer(ctrl_info); + pqi_free_interrupts(ctrl_info); + if (ctrl_info->queue_memory_base) + dma_free_coherent(&ctrl_info->pci_dev->dev, +@@ -8572,8 +8598,15 @@ static void pqi_free_ctrl_resources(stru + + static void pqi_remove_ctrl(struct pqi_ctrl_info *ctrl_info) + { ++ ctrl_info->controller_online = false; ++ pqi_stop_heartbeat_timer(ctrl_info); ++ pqi_ctrl_block_requests(ctrl_info); + pqi_cancel_rescan_worker(ctrl_info); + pqi_cancel_update_time_worker(ctrl_info); ++ if (ctrl_info->ctrl_removal_state == PQI_CTRL_SURPRISE_REMOVAL) { ++ pqi_fail_all_outstanding_requests(ctrl_info); ++ ctrl_info->pqi_mode_enabled = false; ++ } + pqi_remove_all_scsi_devices(ctrl_info); + pqi_unregister_scsi(ctrl_info); + if (ctrl_info->pqi_mode_enabled) +@@ -8914,11 +8947,18 @@ error: + static void pqi_pci_remove(struct pci_dev *pci_dev) + { + struct pqi_ctrl_info *ctrl_info; ++ u16 vendor_id; + + ctrl_info = pci_get_drvdata(pci_dev); + if (!ctrl_info) + return; + ++ pci_read_config_word(ctrl_info->pci_dev, PCI_SUBSYSTEM_VENDOR_ID, &vendor_id); ++ if (vendor_id == 0xffff) ++ ctrl_info->ctrl_removal_state = PQI_CTRL_SURPRISE_REMOVAL; ++ else ++ ctrl_info->ctrl_removal_state = PQI_CTRL_GRACEFUL_REMOVAL; ++ + pqi_remove_ctrl(ctrl_info); + } + +--- a/drivers/scsi/smartpqi/smartpqi_sis.c ++++ b/drivers/scsi/smartpqi/smartpqi_sis.c +@@ -138,7 +138,7 @@ bool sis_is_firmware_running(struct pqi_ + + status = readl(&ctrl_info->registers->sis_firmware_status); + +- if (status & SIS_CTRL_KERNEL_PANIC) ++ if (status != ~0 && (status & SIS_CTRL_KERNEL_PANIC)) + running = false; + else + running = true; diff --git a/patches.suse/scsi-smartpqi-Fix-RAID-map-race-condition.patch b/patches.suse/scsi-smartpqi-Fix-RAID-map-race-condition.patch new file mode 100644 index 0000000..b092c60 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Fix-RAID-map-race-condition.patch @@ -0,0 +1,176 @@ +From: Kevin Barnett +Date: Fri, 8 Jul 2022 13:47:41 -0500 +Subject: scsi: smartpqi: Fix RAID map race condition +Patch-mainline: v6.0-rc1 +Git-commit: 6ce3cfb365ebb2b93ee547318c6a108e62c740a1 +References: jsc#PED-1557 + +Correct a rare stale RAID map access when performing AIO during a RAID +configuration change. + +A race condition in the driver could cause it to access a stale RAID map +when a logical volume is reconfigured. + +Modify the driver logic to invalidate a RAID map very early when a RAID +configuration change is detected and only switch to a new RAID map after +the driver detects that the RAID map has changed. + +Link: https://lore.kernel.org/r/165730606128.177165.7671413443814750829.stgit@brunhilda +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Reviewed-by: Mike McGowen +Signed-off-by: Kevin Barnett +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 110 +++++++++++++++++++++------------- + 1 file changed, 71 insertions(+), 39 deletions(-) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -2026,6 +2026,23 @@ static void pqi_dev_info(struct pqi_ctrl + dev_info(&ctrl_info->pci_dev->dev, "%s %s\n", action, buffer); + } + ++static bool pqi_raid_maps_equal(struct raid_map *raid_map1, struct raid_map *raid_map2) ++{ ++ u32 raid_map1_size; ++ u32 raid_map2_size; ++ ++ if (raid_map1 == NULL || raid_map2 == NULL) ++ return raid_map1 == raid_map2; ++ ++ raid_map1_size = get_unaligned_le32(&raid_map1->structure_size); ++ raid_map2_size = get_unaligned_le32(&raid_map2->structure_size); ++ ++ if (raid_map1_size != raid_map2_size) ++ return false; ++ ++ return memcmp(raid_map1, raid_map2, raid_map1_size) == 0; ++} ++ + /* Assumes the SCSI device list lock is held. */ + + static void pqi_scsi_update_device(struct pqi_ctrl_info *ctrl_info, +@@ -2039,52 +2056,51 @@ static void pqi_scsi_update_device(struc + existing_device->target_lun_valid = true; + } + +- if (pqi_is_logical_device(existing_device) && +- ctrl_info->logical_volume_rescan_needed) +- existing_device->rescan = true; +- + /* By definition, the scsi3addr and wwid fields are already the same. */ + + existing_device->is_physical_device = new_device->is_physical_device; +- existing_device->is_external_raid_device = +- new_device->is_external_raid_device; +- existing_device->is_expander_smp_device = +- new_device->is_expander_smp_device; +- existing_device->aio_enabled = new_device->aio_enabled; +- memcpy(existing_device->vendor, new_device->vendor, +- sizeof(existing_device->vendor)); +- memcpy(existing_device->model, new_device->model, +- sizeof(existing_device->model)); ++ memcpy(existing_device->vendor, new_device->vendor, sizeof(existing_device->vendor)); ++ memcpy(existing_device->model, new_device->model, sizeof(existing_device->model)); + existing_device->sas_address = new_device->sas_address; +- existing_device->raid_level = new_device->raid_level; + existing_device->queue_depth = new_device->queue_depth; +- existing_device->aio_handle = new_device->aio_handle; +- existing_device->volume_status = new_device->volume_status; +- existing_device->active_path_index = new_device->active_path_index; +- existing_device->phy_id = new_device->phy_id; +- existing_device->path_map = new_device->path_map; +- existing_device->bay = new_device->bay; +- existing_device->box_index = new_device->box_index; +- existing_device->phys_box_on_bus = new_device->phys_box_on_bus; +- existing_device->phy_connected_dev_type = new_device->phy_connected_dev_type; +- existing_device->multi_lun_device_lun_count = new_device->multi_lun_device_lun_count; +- if (!existing_device->multi_lun_device_lun_count) +- existing_device->multi_lun_device_lun_count = 1; +- memcpy(existing_device->box, new_device->box, +- sizeof(existing_device->box)); +- memcpy(existing_device->phys_connector, new_device->phys_connector, +- sizeof(existing_device->phys_connector)); +- memset(existing_device->next_bypass_group, 0, sizeof(existing_device->next_bypass_group)); +- kfree(existing_device->raid_map); +- existing_device->raid_map = new_device->raid_map; +- existing_device->raid_bypass_configured = +- new_device->raid_bypass_configured; +- existing_device->raid_bypass_enabled = +- new_device->raid_bypass_enabled; + existing_device->device_offline = false; + +- /* To prevent this from being freed later. */ +- new_device->raid_map = NULL; ++ if (pqi_is_logical_device(existing_device)) { ++ existing_device->is_external_raid_device = new_device->is_external_raid_device; ++ ++ if (existing_device->devtype == TYPE_DISK) { ++ existing_device->raid_level = new_device->raid_level; ++ existing_device->volume_status = new_device->volume_status; ++ if (ctrl_info->logical_volume_rescan_needed) ++ existing_device->rescan = true; ++ memset(existing_device->next_bypass_group, 0, sizeof(existing_device->next_bypass_group)); ++ if (!pqi_raid_maps_equal(existing_device->raid_map, new_device->raid_map)) { ++ kfree(existing_device->raid_map); ++ existing_device->raid_map = new_device->raid_map; ++ /* To prevent this from being freed later. */ ++ new_device->raid_map = NULL; ++ } ++ existing_device->raid_bypass_configured = new_device->raid_bypass_configured; ++ existing_device->raid_bypass_enabled = new_device->raid_bypass_enabled; ++ } ++ } else { ++ existing_device->aio_enabled = new_device->aio_enabled; ++ existing_device->aio_handle = new_device->aio_handle; ++ existing_device->is_expander_smp_device = new_device->is_expander_smp_device; ++ existing_device->active_path_index = new_device->active_path_index; ++ existing_device->phy_id = new_device->phy_id; ++ existing_device->path_map = new_device->path_map; ++ existing_device->bay = new_device->bay; ++ existing_device->box_index = new_device->box_index; ++ existing_device->phys_box_on_bus = new_device->phys_box_on_bus; ++ existing_device->phy_connected_dev_type = new_device->phy_connected_dev_type; ++ memcpy(existing_device->box, new_device->box, sizeof(existing_device->box)); ++ memcpy(existing_device->phys_connector, new_device->phys_connector, sizeof(existing_device->phys_connector)); ++ ++ existing_device->multi_lun_device_lun_count = new_device->multi_lun_device_lun_count; ++ if (existing_device->multi_lun_device_lun_count == 0) ++ existing_device->multi_lun_device_lun_count = 1; ++ } + } + + static inline void pqi_free_device(struct pqi_scsi_dev *device) +@@ -3675,6 +3691,20 @@ static bool pqi_ofa_process_event(struct + return ack_event; + } + ++static void pqi_disable_raid_bypass(struct pqi_ctrl_info *ctrl_info) ++{ ++ unsigned long flags; ++ struct pqi_scsi_dev *device; ++ ++ spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); ++ ++ list_for_each_entry(device, &ctrl_info->scsi_device_list, scsi_device_list_entry) ++ if (device->raid_bypass_enabled) ++ device->raid_bypass_enabled = false; ++ ++ spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); ++} ++ + static void pqi_event_worker(struct work_struct *work) + { + unsigned int i; +@@ -3702,6 +3732,8 @@ static void pqi_event_worker(struct work + rescan_needed = true; + if (event->event_type == PQI_EVENT_TYPE_LOGICAL_DEVICE) + ctrl_info->logical_volume_rescan_needed = true; ++ else if (event->event_type == PQI_EVENT_TYPE_AIO_STATE_CHANGE) ++ pqi_disable_raid_bypass(ctrl_info); + } + if (ack_event) + pqi_acknowledge_event(ctrl_info, event); diff --git a/patches.suse/scsi-smartpqi-Fix-a-name-typo-and-cleanup-code.patch b/patches.suse/scsi-smartpqi-Fix-a-name-typo-and-cleanup-code.patch new file mode 100644 index 0000000..a3e3330 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Fix-a-name-typo-and-cleanup-code.patch @@ -0,0 +1,65 @@ +From: Kevin Barnett +Date: Tue, 1 Feb 2022 15:48:18 -0600 +Subject: scsi: smartpqi: Fix a name typo and cleanup code +Patch-mainline: v5.18-rc1 +Git-commit: b4dc06a9070e3ca9d18a33fe649df594832dde1a +References: jsc#PED-1557 + +Rename the function pqi_is_io_high_priority() to pqi_is_io_high_priority(). +Remove 2 unnecessary lines from the function, and adjusted the white space. + +Link: https://lore.kernel.org/r/164375209818.440833.10908948825731635853.stgit@brunhilda.pdev.net +Reviewed-by: Mike McGowen +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Signed-off-by: Kevin Barnett +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -5581,18 +5581,19 @@ static void pqi_aio_io_complete(struct p + pqi_scsi_done(scmd); + } + +-static inline bool pqi_is_io_high_prioity(struct pqi_ctrl_info *ctrl_info, ++static inline bool pqi_is_io_high_priority(struct pqi_ctrl_info *ctrl_info, + struct pqi_scsi_dev *device, struct scsi_cmnd *scmd) + { + bool io_high_prio; + int priority_class; + + io_high_prio = false; ++ + if (device->ncq_prio_enable) { + priority_class = + IOPRIO_PRIO_CLASS(req_get_ioprio(scsi_cmd_to_rq(scmd))); + if (priority_class == IOPRIO_CLASS_RT) { +- /* set NCQ priority for read/write command */ ++ /* Set NCQ priority for read/write commands. */ + switch (scmd->cmnd[0]) { + case WRITE_16: + case READ_16: +@@ -5604,8 +5605,6 @@ static inline bool pqi_is_io_high_prioit + case READ_6: + io_high_prio = true; + break; +- default: +- break; + } + } + } +@@ -5619,7 +5618,8 @@ static inline int pqi_aio_submit_scsi_cm + { + bool io_high_prio; + +- io_high_prio = pqi_is_io_high_prioity(ctrl_info, device, scmd); ++ io_high_prio = pqi_is_io_high_priority(ctrl_info, device, scmd); ++ + return pqi_aio_submit_io(ctrl_info, scmd, device->aio_handle, + scmd->cmnd, scmd->cmd_len, queue_group, NULL, + false, io_high_prio); diff --git a/patches.suse/scsi-smartpqi-Fix-a-typo-in-func-pqi_aio_submit_io.patch b/patches.suse/scsi-smartpqi-Fix-a-typo-in-func-pqi_aio_submit_io.patch new file mode 100644 index 0000000..f5e9ce2 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Fix-a-typo-in-func-pqi_aio_submit_io.patch @@ -0,0 +1,38 @@ +From: Kevin Barnett +Date: Tue, 1 Feb 2022 15:48:23 -0600 +Subject: scsi: smartpqi: Fix a typo in func pqi_aio_submit_io() +Patch-mainline: v5.18-rc1 +Git-commit: 9e98e60bfca341f5f1bf425dbf68cb1a96b323c9 +References: jsc#PED-1557 + +Use correct pqi_aio_path_request structure to calculate the offset to +sg_descriptors. + +The function pqi_aio_submit_io() uses the pqi_raid_path_request structure +to calculate the offset of the structure member sg_descriptors. This is +incorrect. It should be using the pqi_aio_path_request structure instead. + +This typo is benign because the offsets are the same in both structures. + +Link: https://lore.kernel.org/r/164375210321.440833.2566086558909686629.stgit@brunhilda.pdev.net +Reviewed-by: Mike McGowen +Reviewed-by: Scott Teel +Signed-off-by: Kevin Barnett +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -5641,7 +5641,7 @@ static int pqi_aio_submit_io(struct pqi_ + io_request->raid_bypass = raid_bypass; + + request = io_request->iu; +- memset(request, 0, offsetof(struct pqi_raid_path_request, sg_descriptors)); ++ memset(request, 0, offsetof(struct pqi_aio_path_request, sg_descriptors)); + + request->header.iu_type = PQI_REQUEST_IU_AIO_PATH_IO; + put_unaligned_le32(aio_handle, &request->nexus_id); diff --git a/patches.suse/scsi-smartpqi-Fix-hibernate-and-suspend.patch b/patches.suse/scsi-smartpqi-Fix-hibernate-and-suspend.patch new file mode 100644 index 0000000..dd068e8 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Fix-hibernate-and-suspend.patch @@ -0,0 +1,194 @@ +From: Kevin Barnett +Date: Tue, 1 Feb 2022 15:49:08 -0600 +Subject: scsi: smartpqi: Fix hibernate and suspend +Patch-mainline: v5.18-rc1 +Git-commit: c66e078ad89e9f171a2474b255284d95c54c4c36 +References: jsc#PED-1557 + +Restructure the hibernate/suspend code to allow workarounds for the +controller boot differences. + +Newer controllers have subtle differences in the way that they boot up. + +Link: https://lore.kernel.org/r/164375214859.440833.14683009064111314948.stgit@brunhilda.pdev.net +Reviewed-by: Mike McGowen +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Signed-off-by: Kevin Barnett +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 116 +++++++++++++++++++++++----------- + 1 file changed, 81 insertions(+), 35 deletions(-) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -8955,15 +8955,16 @@ static inline enum bmic_flush_cache_shut + { + if (pci_dev->subsystem_vendor == PCI_VENDOR_ID_ADAPTEC2 && pci_dev->subsystem_device == 0x1304) + return RESTART; ++ + return SUSPEND; + } + +-static __maybe_unused int pqi_suspend(struct pci_dev *pci_dev, pm_message_t state) ++static int pqi_suspend_or_freeze(struct device *dev, bool suspend) + { ++ struct pci_dev *pci_dev; + struct pqi_ctrl_info *ctrl_info; +- enum bmic_flush_cache_shutdown_event shutdown_event; + +- shutdown_event = pqi_get_flush_cache_shutdown_event(pci_dev); ++ pci_dev = to_pci_dev(dev); + ctrl_info = pci_get_drvdata(pci_dev); + + pqi_wait_until_ofa_finished(ctrl_info); +@@ -8973,16 +8974,17 @@ static __maybe_unused int pqi_suspend(st + pqi_ctrl_block_device_reset(ctrl_info); + pqi_ctrl_block_requests(ctrl_info); + pqi_ctrl_wait_until_quiesced(ctrl_info); +- pqi_flush_cache(ctrl_info, shutdown_event); +- pqi_stop_heartbeat_timer(ctrl_info); + +- pqi_crash_if_pending_command(ctrl_info); ++ if (suspend) { ++ enum bmic_flush_cache_shutdown_event shutdown_event; + +- if (state.event == PM_EVENT_FREEZE) +- return 0; ++ shutdown_event = pqi_get_flush_cache_shutdown_event(pci_dev); ++ pqi_flush_cache(ctrl_info, shutdown_event); ++ } + +- pci_save_state(pci_dev); +- pci_set_power_state(pci_dev, pci_choose_state(pci_dev, state)); ++ pqi_stop_heartbeat_timer(ctrl_info); ++ pqi_crash_if_pending_command(ctrl_info); ++ pqi_free_irqs(ctrl_info); + + ctrl_info->controller_online = false; + ctrl_info->pqi_mode_enabled = false; +@@ -8990,44 +8992,87 @@ static __maybe_unused int pqi_suspend(st + return 0; + } + +-static __maybe_unused int pqi_resume(struct pci_dev *pci_dev) ++static __maybe_unused int pqi_suspend(struct device *dev) ++{ ++ return pqi_suspend_or_freeze(dev, true); ++} ++ ++static int pqi_resume_or_restore(struct device *dev) + { + int rc; ++ struct pci_dev *pci_dev; + struct pqi_ctrl_info *ctrl_info; + ++ pci_dev = to_pci_dev(dev); + ctrl_info = pci_get_drvdata(pci_dev); + +- if (pci_dev->current_state != PCI_D0) { +- ctrl_info->max_hw_queue_index = 0; +- pqi_free_interrupts(ctrl_info); +- pqi_change_irq_mode(ctrl_info, IRQ_MODE_INTX); +- rc = request_irq(pci_irq_vector(pci_dev, 0), pqi_irq_handler, +- IRQF_SHARED, DRIVER_NAME_SHORT, +- &ctrl_info->queue_groups[0]); +- if (rc) { +- dev_err(&ctrl_info->pci_dev->dev, +- "irq %u init failed with error %d\n", +- pci_dev->irq, rc); +- return rc; +- } +- pqi_ctrl_unblock_device_reset(ctrl_info); +- pqi_ctrl_unblock_requests(ctrl_info); +- pqi_scsi_unblock_requests(ctrl_info); +- pqi_ctrl_unblock_scan(ctrl_info); +- return 0; +- } +- +- pci_set_power_state(pci_dev, PCI_D0); +- pci_restore_state(pci_dev); ++ rc = pqi_request_irqs(ctrl_info); ++ if (rc) ++ return rc; + + pqi_ctrl_unblock_device_reset(ctrl_info); + pqi_ctrl_unblock_requests(ctrl_info); + pqi_scsi_unblock_requests(ctrl_info); + pqi_ctrl_unblock_scan(ctrl_info); + ++ ssleep(PQI_POST_RESET_DELAY_SECS); ++ + return pqi_ctrl_init_resume(ctrl_info); + } + ++static int pqi_freeze(struct device *dev) ++{ ++ return pqi_suspend_or_freeze(dev, false); ++} ++ ++static int pqi_thaw(struct device *dev) ++{ ++ int rc; ++ struct pci_dev *pci_dev; ++ struct pqi_ctrl_info *ctrl_info; ++ ++ pci_dev = to_pci_dev(dev); ++ ctrl_info = pci_get_drvdata(pci_dev); ++ ++ rc = pqi_request_irqs(ctrl_info); ++ if (rc) ++ return rc; ++ ++ ctrl_info->controller_online = true; ++ ctrl_info->pqi_mode_enabled = true; ++ ++ pqi_ctrl_unblock_device_reset(ctrl_info); ++ pqi_ctrl_unblock_requests(ctrl_info); ++ pqi_scsi_unblock_requests(ctrl_info); ++ pqi_ctrl_unblock_scan(ctrl_info); ++ ++ return 0; ++} ++ ++static int pqi_poweroff(struct device *dev) ++{ ++ struct pci_dev *pci_dev; ++ struct pqi_ctrl_info *ctrl_info; ++ enum bmic_flush_cache_shutdown_event shutdown_event; ++ ++ pci_dev = to_pci_dev(dev); ++ ctrl_info = pci_get_drvdata(pci_dev); ++ ++ shutdown_event = pqi_get_flush_cache_shutdown_event(pci_dev); ++ pqi_flush_cache(ctrl_info, shutdown_event); ++ ++ return 0; ++} ++ ++static const struct dev_pm_ops pqi_pm_ops = { ++ .suspend = pqi_suspend, ++ .resume = pqi_resume_or_restore, ++ .freeze = pqi_freeze, ++ .thaw = pqi_thaw, ++ .poweroff = pqi_poweroff, ++ .restore = pqi_resume_or_restore, ++}; ++ + /* Define the PCI IDs for the controllers that we support. */ + static const struct pci_device_id pqi_pci_id_table[] = { + { +@@ -9694,8 +9739,9 @@ static struct pci_driver pqi_pci_driver + .remove = pqi_pci_remove, + .shutdown = pqi_shutdown, + #if defined(CONFIG_PM) +- .suspend = pqi_suspend, +- .resume = pqi_resume, ++ .driver = { ++ .pm = &pqi_pm_ops ++ }, + #endif + }; + diff --git a/patches.suse/scsi-smartpqi-Fix-kdump-issue-when-controller-is-loc.patch b/patches.suse/scsi-smartpqi-Fix-kdump-issue-when-controller-is-loc.patch new file mode 100644 index 0000000..3adde31 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Fix-kdump-issue-when-controller-is-loc.patch @@ -0,0 +1,93 @@ +From: Mahesh Rajashekhara +Date: Tue, 1 Feb 2022 15:48:43 -0600 +Subject: scsi: smartpqi: Fix kdump issue when controller is locked up +Patch-mainline: v5.18-rc1 +Git-commit: 3ada501d602abf02353445c03bb3258146445d90 +References: jsc#PED-1557 + +Avoid dropping into shell if the controller is in locked up state. + +Driver issues SIS soft reset to bring back the controller to SIS mode while +OS boots into kdump mode. + +If the controller is in lockup state, SIS soft reset does not work. + +Since the controller lockup code has not been cleared, driver considers the +firmware is no longer up and running. Driver returns back an error code to +OS and the kdump fails. + +Link: https://lore.kernel.org/r/164375212337.440833.11955356190354940369.stgit@brunhilda.pdev.net +Reviewed-by: Kevin Barnett +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Signed-off-by: Mahesh Rajashekhara +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 39 ++++++++++++++++++++-------------- + 1 file changed, 23 insertions(+), 16 deletions(-) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -7986,6 +7986,21 @@ static int pqi_force_sis_mode(struct pqi + return pqi_revert_to_sis_mode(ctrl_info); + } + ++static void pqi_perform_lockup_action(void) ++{ ++ switch (pqi_lockup_action) { ++ case PANIC: ++ panic("FATAL: Smart Family Controller lockup detected"); ++ break; ++ case REBOOT: ++ emergency_restart(); ++ break; ++ case NONE: ++ default: ++ break; ++ } ++} ++ + static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info) + { + int rc; +@@ -8010,8 +8025,15 @@ static int pqi_ctrl_init(struct pqi_ctrl + * commands. + */ + rc = sis_wait_for_ctrl_ready(ctrl_info); +- if (rc) ++ if (rc) { ++ if (reset_devices) { ++ dev_err(&ctrl_info->pci_dev->dev, ++ "kdump init failed with error %d\n", rc); ++ pqi_lockup_action = REBOOT; ++ pqi_perform_lockup_action(); ++ } + return rc; ++ } + + /* + * Get the controller properties. This allows us to determine +@@ -8736,21 +8758,6 @@ static int pqi_ofa_ctrl_restart(struct p + return pqi_ctrl_init_resume(ctrl_info); + } + +-static void pqi_perform_lockup_action(void) +-{ +- switch (pqi_lockup_action) { +- case PANIC: +- panic("FATAL: Smart Family Controller lockup detected"); +- break; +- case REBOOT: +- emergency_restart(); +- break; +- case NONE: +- default: +- break; +- } +-} +- + static struct pqi_raid_error_info pqi_ctrl_offline_raid_error_info = { + .data_out_result = PQI_DATA_IN_OUT_HARDWARE_ERROR, + .status = SAM_STAT_CHECK_CONDITION, diff --git a/patches.suse/scsi-smartpqi-Fix-lsscsi-t-SAS-addresses.patch b/patches.suse/scsi-smartpqi-Fix-lsscsi-t-SAS-addresses.patch new file mode 100644 index 0000000..dfc8a50 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Fix-lsscsi-t-SAS-addresses.patch @@ -0,0 +1,49 @@ +From: Kevin Barnett +Date: Tue, 1 Feb 2022 15:49:13 -0600 +Subject: scsi: smartpqi: Fix lsscsi -t SAS addresses +Patch-mainline: v5.18-rc1 +Git-commit: 291c2e0071efbda9d5c360a793abee4055e81fea +References: jsc#PED-1557 + +Correct lsscsi -t output for newer controllers that support 16-byte WWID in +the SAS address field. lsscsi -t was displaying all zeros for SAS +addresses. + +When we added support to smartpqi for 16-byte WWIDs in the RPL data for +newer controllers, we were copying the wrong part of the 16-byte WWID to +the SAS address field. + +Link: https://lore.kernel.org/r/164375215363.440833.7298523719813806902.stgit@brunhilda.pdev.net +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Reviewed-by: Mike McGowen +Signed-off-by: Kevin Barnett +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -1182,8 +1182,8 @@ static inline int pqi_report_phys_luns(s + + for (i = 0; i < num_physicals; i++) { + memcpy(&rpl_16byte_wwid_list->lun_entries[i].lunid, &rpl_8byte_wwid_list->lun_entries[i].lunid, sizeof(rpl_8byte_wwid_list->lun_entries[i].lunid)); +- memset(&rpl_16byte_wwid_list->lun_entries[i].wwid, 0, 8); +- memcpy(&rpl_16byte_wwid_list->lun_entries[i].wwid[8], &rpl_8byte_wwid_list->lun_entries[i].wwid, sizeof(rpl_8byte_wwid_list->lun_entries[i].wwid)); ++ memcpy(&rpl_16byte_wwid_list->lun_entries[i].wwid[0], &rpl_8byte_wwid_list->lun_entries[i].wwid, sizeof(rpl_8byte_wwid_list->lun_entries[i].wwid)); ++ memset(&rpl_16byte_wwid_list->lun_entries[i].wwid[8], 0, 8); + rpl_16byte_wwid_list->lun_entries[i].device_type = rpl_8byte_wwid_list->lun_entries[i].device_type; + rpl_16byte_wwid_list->lun_entries[i].device_flags = rpl_8byte_wwid_list->lun_entries[i].device_flags; + rpl_16byte_wwid_list->lun_entries[i].lun_count = rpl_8byte_wwid_list->lun_entries[i].lun_count; +@@ -2472,7 +2472,7 @@ static int pqi_update_scsi_devices(struc + sizeof(device->volume_id)); + } + +- device->sas_address = get_unaligned_be64(&device->wwid[8]); ++ device->sas_address = get_unaligned_be64(&device->wwid[0]); + + new_device_list[num_valid_devices++] = device; + } diff --git a/patches.suse/scsi-smartpqi-Fix-rmmod-stack-trace.patch b/patches.suse/scsi-smartpqi-Fix-rmmod-stack-trace.patch new file mode 100644 index 0000000..7a0ad8b --- /dev/null +++ b/patches.suse/scsi-smartpqi-Fix-rmmod-stack-trace.patch @@ -0,0 +1,44 @@ +From: Don Brace +Date: Tue, 1 Feb 2022 15:47:53 -0600 +Subject: scsi: smartpqi: Fix rmmod stack trace +Patch-mainline: v5.18-rc1 +Git-commit: c4ff687d25c05919382a759503bd3821689f4e2f +References: jsc#PED-1557 + +Prevent "BUG: scheduling while atomic: rmmod" stack trace. + +Stop setting spin_locks before calling OS functions to remove devices. + +Link: https://lore.kernel.org/r/164375207296.440833.4996145011193819683.stgit@brunhilda.pdev.net +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Reviewed-by: Kevin Barnett +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -2513,17 +2513,15 @@ static void pqi_remove_all_scsi_devices( + struct pqi_scsi_dev *device; + struct pqi_scsi_dev *next; + +- spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); +- + list_for_each_entry_safe(device, next, &ctrl_info->scsi_device_list, + scsi_device_list_entry) { + if (pqi_is_device_added(device)) + pqi_remove_device(ctrl_info, device); ++ spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); + list_del(&device->scsi_device_list_entry); + pqi_free_device(device); ++ spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); + } +- +- spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); + } + + static int pqi_scan_scsi_devices(struct pqi_ctrl_info *ctrl_info) diff --git a/patches.suse/scsi-smartpqi-Fix-typo-in-comment.patch b/patches.suse/scsi-smartpqi-Fix-typo-in-comment.patch new file mode 100644 index 0000000..2e2dae4 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Fix-typo-in-comment.patch @@ -0,0 +1,29 @@ +From: Julia Lawall +Date: Sat, 21 May 2022 13:11:08 +0200 +Subject: scsi: smartpqi: Fix typo in comment +Patch-mainline: v5.19-rc1 +Git-commit: 8946ea283808d0905b11d12649976a227762d7e7 +References: jsc#PED-1557 + +Spelling mistake (triple letters) in comment. Detected with the help of +Coccinelle. + +Link: https://lore.kernel.org/r/20220521111145.81697-58-Julia.Lawall@inria.fr +Signed-off-by: Julia Lawall +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/scsi/smartpqi/smartpqi.h ++++ b/drivers/scsi/smartpqi/smartpqi.h +@@ -1082,7 +1082,7 @@ struct pqi_stream_data { + }; + + struct pqi_scsi_dev { +- int devtype; /* as reported by INQUIRY commmand */ ++ int devtype; /* as reported by INQUIRY command */ + u8 device_type; /* as reported by */ + /* BMIC_IDENTIFY_PHYSICAL_DEVICE */ + /* only valid for devtype = TYPE_DISK */ diff --git a/patches.suse/scsi-smartpqi-Fix-unused-variable-pqi_pm_ops-for-cla.patch b/patches.suse/scsi-smartpqi-Fix-unused-variable-pqi_pm_ops-for-cla.patch new file mode 100644 index 0000000..8a76490 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Fix-unused-variable-pqi_pm_ops-for-cla.patch @@ -0,0 +1,48 @@ +From: Don Brace +Date: Thu, 10 Feb 2022 14:11:51 -0600 +Subject: scsi: smartpqi: Fix unused variable pqi_pm_ops for clang +Patch-mainline: v5.18-rc1 +Git-commit: 31b17c3aeb5e9413ed627626f6213b3e53b20c8e +References: jsc#PED-1557 + +Driver added a new dev_pm_ops structure used only if CONFIG_PM is set. The +CONFIG_PM MACRO needed to be moved up in the code to avoid the compiler +warnings. The hunk to move the location was missing from the above patch. + +Found by kernel test robot by building driver with CONFIG_PM disabled. + +Link: https://lore.kernel.org/all/202202090657.bstNLuce-lkp@intel.com/ +Link: https://lore.kernel.org/r/20220210201151.236170-1-don.brace@microchip.com +Fixes: c66e078ad89e ("scsi: smartpqi: Fix hibernate and suspend") +Reported-by: kernel test robot +Reviewed-by: Scott Teel +Reviewed-by: Scott Benesh +Reviewed-by: Mike Mcgowen +Reviewed-by: Kevin Barnett +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -8951,6 +8951,8 @@ static void pqi_process_module_params(vo + pqi_process_lockup_action_param(); + } + ++#if defined(CONFIG_PM) ++ + static inline enum bmic_flush_cache_shutdown_event pqi_get_flush_cache_shutdown_event(struct pci_dev *pci_dev) + { + if (pci_dev->subsystem_vendor == PCI_VENDOR_ID_ADAPTEC2 && pci_dev->subsystem_device == 0x1304) +@@ -9073,6 +9075,8 @@ static const struct dev_pm_ops pqi_pm_op + .restore = pqi_resume_or_restore, + }; + ++#endif /* CONFIG_PM */ ++ + /* Define the PCI IDs for the controllers that we support. */ + static const struct pci_device_id pqi_pci_id_table[] = { + { diff --git a/patches.suse/scsi-smartpqi-Quickly-propagate-path-failures-to-SCS.patch b/patches.suse/scsi-smartpqi-Quickly-propagate-path-failures-to-SCS.patch new file mode 100644 index 0000000..8049aff --- /dev/null +++ b/patches.suse/scsi-smartpqi-Quickly-propagate-path-failures-to-SCS.patch @@ -0,0 +1,75 @@ +From: Murthy Bhat +Date: Tue, 1 Feb 2022 15:48:13 -0600 +Subject: scsi: smartpqi: Quickly propagate path failures to SCSI midlayer +Patch-mainline: v5.18-rc1 +Git-commit: 94a68c814328836d022d1e7ced1b762834917bd2 +References: jsc#PED-1557 + +Return DID_NO_CONNECT when a path failure is detected. + +When a path fails during I/O and AIO path gets disabled for a multipath +device, the I/O was retried in the RAID path slowing down path fail +detection. Returning DID_NO_CONNECT allows multipath to switch paths more +quickly. + +Link: https://lore.kernel.org/r/164375209313.440833.9992416628621839233.stgit@brunhilda.pdev.net +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Reviewed-by: Sagar Biradar +Signed-off-by: Murthy Bhat +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 20 ++++++++++++++++++-- + 1 file changed, 18 insertions(+), 2 deletions(-) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -2291,6 +2291,14 @@ static inline bool pqi_is_device_with_sa + return false; + } + ++static inline bool pqi_is_multipath_device(struct pqi_scsi_dev *device) ++{ ++ if (pqi_is_logical_device(device)) ++ return false; ++ ++ return (device->path_map & (device->path_map - 1)) != 0; ++} ++ + static inline bool pqi_expose_device(struct pqi_scsi_dev *device) + { + return !device->is_physical_device || !pqi_skip_device(device->scsi3addr); +@@ -3216,12 +3224,14 @@ static void pqi_process_aio_io_error(str + int residual_count; + int xfer_count; + bool device_offline; ++ struct pqi_scsi_dev *device; + + scmd = io_request->scmd; + error_info = io_request->error_info; + host_byte = DID_OK; + sense_data_length = 0; + device_offline = false; ++ device = scmd->device->hostdata; + + switch (error_info->service_response) { + case PQI_AIO_SERV_RESPONSE_COMPLETE: +@@ -3246,8 +3256,14 @@ static void pqi_process_aio_io_error(str + break; + case PQI_AIO_STATUS_AIO_PATH_DISABLED: + pqi_aio_path_disabled(io_request); +- scsi_status = SAM_STAT_GOOD; +- io_request->status = -EAGAIN; ++ if (pqi_is_multipath_device(device)) { ++ pqi_device_remove_start(device); ++ host_byte = DID_NO_CONNECT; ++ scsi_status = SAM_STAT_CHECK_CONDITION; ++ } else { ++ scsi_status = SAM_STAT_GOOD; ++ io_request->status = -EAGAIN; ++ } + break; + case PQI_AIO_STATUS_NO_PATH_TO_DEVICE: + case PQI_AIO_STATUS_INVALID_DEVICE: diff --git a/patches.suse/scsi-smartpqi-Resolve-delay-issue-with-PQI_HZ-value.patch b/patches.suse/scsi-smartpqi-Resolve-delay-issue-with-PQI_HZ-value.patch new file mode 100644 index 0000000..9f4da73 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Resolve-delay-issue-with-PQI_HZ-value.patch @@ -0,0 +1,232 @@ +From: Balsundar P +Date: Tue, 1 Feb 2022 15:48:28 -0600 +Subject: scsi: smartpqi: Resolve delay issue with PQI_HZ value +Patch-mainline: v5.18-rc1 +Git-commit: 42dc0426fbbbe380c83976e7601f23de0034249d +References: jsc#PED-1557 + +Change PQI_HZ to HZ. + +PQI_HZ macro was set to 1000 when HZ value is less than 1000. By default, +PQI_HZ will result into a delay of 10 seconds(for kernel, which has HZ = +100). So in this case when firmware raises an event, rescan worker will be +scheduled after a delay of (10 x PQI_HZ) = 100 seconds instead of 10 +seconds. + +Also driver uses PQI_HZ at many instances, which might result in some other +issues with respect to delay. + +Link: https://lore.kernel.org/r/164375210825.440833.15510172447583227486.stgit@brunhilda.pdev.net +Reviewed-by: Kevin Barnett +Reviewed-by: Mike McGowen +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Signed-off-by: Balsundar P +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi.h | 6 ------ + drivers/scsi/smartpqi/smartpqi_init.c | 32 ++++++++++++++++---------------- + drivers/scsi/smartpqi/smartpqi_sis.c | 8 ++++---- + 3 files changed, 20 insertions(+), 26 deletions(-) + +--- a/drivers/scsi/smartpqi/smartpqi.h ++++ b/drivers/scsi/smartpqi/smartpqi.h +@@ -96,12 +96,6 @@ struct pqi_ctrl_registers { + struct pqi_device_registers pqi_registers; /* 4000h */ + }; + +-#if ((HZ) < 1000) +-#define PQI_HZ 1000 +-#else +-#define PQI_HZ (HZ) +-#endif +- + #define PQI_DEVICE_REGISTERS_OFFSET 0x4000 + + /* shutdown reasons for taking the controller offline */ +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -376,7 +376,7 @@ static inline void pqi_ctrl_wait_until_q + + displayed_warning = false; + start_jiffies = jiffies; +- warning_timeout = (PQI_QUIESCE_WARNING_TIMEOUT_SECS * PQI_HZ) + start_jiffies; ++ warning_timeout = (PQI_QUIESCE_WARNING_TIMEOUT_SECS * HZ) + start_jiffies; + + while (atomic_read(&ctrl_info->num_busy_threads) > + atomic_read(&ctrl_info->num_blocked_threads)) { +@@ -385,7 +385,7 @@ static inline void pqi_ctrl_wait_until_q + "waiting %u seconds for driver activity to quiesce\n", + jiffies_to_msecs(jiffies - start_jiffies) / 1000); + displayed_warning = true; +- warning_timeout = (PQI_QUIESCE_WARNING_TIMEOUT_SECS * PQI_HZ) + jiffies; ++ warning_timeout = (PQI_QUIESCE_WARNING_TIMEOUT_SECS * HZ) + jiffies; + } + usleep_range(1000, 2000); + } +@@ -462,7 +462,7 @@ static inline void pqi_schedule_rescan_w + pqi_schedule_rescan_worker_with_delay(ctrl_info, 0); + } + +-#define PQI_RESCAN_WORK_DELAY (10 * PQI_HZ) ++#define PQI_RESCAN_WORK_DELAY (10 * HZ) + + static inline void pqi_schedule_rescan_worker_delayed(struct pqi_ctrl_info *ctrl_info) + { +@@ -1038,7 +1038,7 @@ static int pqi_write_current_time_to_hos + return rc; + } + +-#define PQI_UPDATE_TIME_WORK_INTERVAL (24UL * 60 * 60 * PQI_HZ) ++#define PQI_UPDATE_TIME_WORK_INTERVAL (24UL * 60 * 60 * HZ) + + static void pqi_update_time_worker(struct work_struct *work) + { +@@ -3045,7 +3045,7 @@ static int pqi_wait_for_pqi_mode_ready(s + u8 status; + + pqi_registers = ctrl_info->pqi_registers; +- timeout = (PQI_MODE_READY_TIMEOUT_SECS * PQI_HZ) + jiffies; ++ timeout = (PQI_MODE_READY_TIMEOUT_SECS * HZ) + jiffies; + + while (1) { + signature = readq(&pqi_registers->signature); +@@ -3539,7 +3539,7 @@ static enum pqi_soft_reset_status pqi_po + u8 status; + unsigned long timeout; + +- timeout = (PQI_SOFT_RESET_STATUS_TIMEOUT_SECS * PQI_HZ) + jiffies; ++ timeout = (PQI_SOFT_RESET_STATUS_TIMEOUT_SECS * HZ) + jiffies; + + while (1) { + status = pqi_read_soft_reset_status(ctrl_info); +@@ -3717,7 +3717,7 @@ out: + pqi_ctrl_unbusy(ctrl_info); + } + +-#define PQI_HEARTBEAT_TIMER_INTERVAL (10 * PQI_HZ) ++#define PQI_HEARTBEAT_TIMER_INTERVAL (10 * HZ) + + static void pqi_heartbeat_timer_handler(struct timer_list *t) + { +@@ -4264,7 +4264,7 @@ static int pqi_alloc_admin_queues(struct + return 0; + } + +-#define PQI_ADMIN_QUEUE_CREATE_TIMEOUT_JIFFIES PQI_HZ ++#define PQI_ADMIN_QUEUE_CREATE_TIMEOUT_JIFFIES HZ + #define PQI_ADMIN_QUEUE_CREATE_POLL_INTERVAL_MSECS 1 + + static int pqi_create_admin_queues(struct pqi_ctrl_info *ctrl_info) +@@ -4358,7 +4358,7 @@ static int pqi_poll_for_admin_response(s + admin_queues = &ctrl_info->admin_queues; + oq_ci = admin_queues->oq_ci_copy; + +- timeout = (PQI_ADMIN_REQUEST_TIMEOUT_SECS * PQI_HZ) + jiffies; ++ timeout = (PQI_ADMIN_REQUEST_TIMEOUT_SECS * HZ) + jiffies; + + while (1) { + oq_pi = readl(admin_queues->oq_pi); +@@ -4473,7 +4473,7 @@ static int pqi_wait_for_completion_io(st + + while (1) { + if (wait_for_completion_io_timeout(wait, +- PQI_WAIT_FOR_COMPLETION_IO_TIMEOUT_SECS * PQI_HZ)) { ++ PQI_WAIT_FOR_COMPLETION_IO_TIMEOUT_SECS * HZ)) { + rc = 0; + break; + } +@@ -6065,7 +6065,7 @@ static int pqi_wait_until_inbound_queues + + displayed_warning = false; + start_jiffies = jiffies; +- warning_timeout = (PQI_INBOUND_QUEUES_NONEMPTY_WARNING_TIMEOUT_SECS * PQI_HZ) + start_jiffies; ++ warning_timeout = (PQI_INBOUND_QUEUES_NONEMPTY_WARNING_TIMEOUT_SECS * HZ) + start_jiffies; + + while (1) { + queued_io_count = pqi_queued_io_count(ctrl_info); +@@ -6080,7 +6080,7 @@ static int pqi_wait_until_inbound_queues + "waiting %u seconds for queued I/O to drain (queued I/O count: %u; non-empty inbound queue count: %u)\n", + jiffies_to_msecs(jiffies - start_jiffies) / 1000, queued_io_count, nonempty_inbound_queue_count); + displayed_warning = true; +- warning_timeout = (PQI_INBOUND_QUEUES_NONEMPTY_WARNING_TIMEOUT_SECS * PQI_HZ) + jiffies; ++ warning_timeout = (PQI_INBOUND_QUEUES_NONEMPTY_WARNING_TIMEOUT_SECS * HZ) + jiffies; + } + usleep_range(1000, 2000); + } +@@ -6148,7 +6148,7 @@ static int pqi_device_wait_for_pending_i + unsigned long msecs_waiting; + + start_jiffies = jiffies; +- warning_timeout = (PQI_PENDING_IO_WARNING_TIMEOUT_SECS * PQI_HZ) + start_jiffies; ++ warning_timeout = (PQI_PENDING_IO_WARNING_TIMEOUT_SECS * HZ) + start_jiffies; + + while ((cmds_outstanding = atomic_read(&device->scsi_cmds_outstanding)) > 0) { + pqi_check_ctrl_health(ctrl_info); +@@ -6167,7 +6167,7 @@ static int pqi_device_wait_for_pending_i + "scsi %d:%d:%d:%d: waiting %lu seconds for %d outstanding command(s)\n", + ctrl_info->scsi_host->host_no, device->bus, device->target, + device->lun, msecs_waiting / 1000, cmds_outstanding); +- warning_timeout = (PQI_PENDING_IO_WARNING_TIMEOUT_SECS * PQI_HZ) + jiffies; ++ warning_timeout = (PQI_PENDING_IO_WARNING_TIMEOUT_SECS * HZ) + jiffies; + } + usleep_range(1000, 2000); + } +@@ -6196,7 +6196,7 @@ static int pqi_wait_for_lun_reset_comple + + while (1) { + if (wait_for_completion_io_timeout(wait, +- PQI_LUN_RESET_POLL_COMPLETION_SECS * PQI_HZ)) { ++ PQI_LUN_RESET_POLL_COMPLETION_SECS * HZ)) { + rc = 0; + break; + } +@@ -7994,7 +7994,7 @@ static int pqi_ctrl_init(struct pqi_ctrl + return rc; + } + sis_soft_reset(ctrl_info); +- msleep(PQI_POST_RESET_DELAY_SECS * PQI_HZ); ++ ssleep(PQI_POST_RESET_DELAY_SECS); + } else { + rc = pqi_force_sis_mode(ctrl_info); + if (rc) +--- a/drivers/scsi/smartpqi/smartpqi_sis.c ++++ b/drivers/scsi/smartpqi/smartpqi_sis.c +@@ -92,7 +92,7 @@ static int sis_wait_for_ctrl_ready_with_ + unsigned long timeout; + u32 status; + +- timeout = (timeout_secs * PQI_HZ) + jiffies; ++ timeout = (timeout_secs * HZ) + jiffies; + + while (1) { + status = readl(&ctrl_info->registers->sis_firmware_status); +@@ -209,7 +209,7 @@ static int sis_send_sync_cmd(struct pqi_ + * the top of the loop in order to give the controller time to start + * processing the command before we start polling. + */ +- timeout = (SIS_CMD_COMPLETE_TIMEOUT_SECS * PQI_HZ) + jiffies; ++ timeout = (SIS_CMD_COMPLETE_TIMEOUT_SECS * HZ) + jiffies; + while (1) { + msleep(SIS_CMD_COMPLETE_POLL_INTERVAL_MSECS); + doorbell = readl(®isters->sis_ctrl_to_host_doorbell); +@@ -355,7 +355,7 @@ static int sis_wait_for_doorbell_bit_to_ + u32 doorbell_register; + unsigned long timeout; + +- timeout = (SIS_DOORBELL_BIT_CLEAR_TIMEOUT_SECS * PQI_HZ) + jiffies; ++ timeout = (SIS_DOORBELL_BIT_CLEAR_TIMEOUT_SECS * HZ) + jiffies; + + while (1) { + doorbell_register = +@@ -452,7 +452,7 @@ int sis_wait_for_fw_triage_completion(st + enum sis_fw_triage_status status; + unsigned long timeout; + +- timeout = (SIS_FW_TRIAGE_STATUS_TIMEOUT_SECS * PQI_HZ) + jiffies; ++ timeout = (SIS_FW_TRIAGE_STATUS_TIMEOUT_SECS * HZ) + jiffies; + while (1) { + status = sis_read_firmware_triage_status(ctrl_info); + if (status == FW_TRIAGE_COND_INVALID) { diff --git a/patches.suse/scsi-smartpqi-Speed-up-RAID-10-sequential-reads.patch b/patches.suse/scsi-smartpqi-Speed-up-RAID-10-sequential-reads.patch new file mode 100644 index 0000000..a02e66f --- /dev/null +++ b/patches.suse/scsi-smartpqi-Speed-up-RAID-10-sequential-reads.patch @@ -0,0 +1,77 @@ +From: Mike McGowen +Date: Tue, 1 Feb 2022 15:48:48 -0600 +Subject: scsi: smartpqi: Speed up RAID 10 sequential reads +Patch-mainline: v5.18-rc1 +Git-commit: 5d8fbce04d36dfd837d655e3d1b66e44b8fafbe5 +References: jsc#PED-1557 + +Use all data disks for sequential read operations. + +Testing discovered inconsistent performance on RAID 10 volumes when +performing 256K sequential reads. The driver was only using a single +tracker to determine which physical drive to send a request to for AIO +requests. + +Change the single tracker (next_bypass_group) to an array of trackers based +on the number of data disks in a row of the RAID map. + +Link: https://lore.kernel.org/r/164375212842.440833.6733971458765002128.stgit@brunhilda.pdev.net +Reviewed-by: Kevin Barnett +Reviewed-by: Mike McGowen +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Signed-off-by: Mike McGowen +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi.h | 5 +++-- + drivers/scsi/smartpqi/smartpqi_init.c | 6 +++--- + 2 files changed, 6 insertions(+), 5 deletions(-) + +--- a/drivers/scsi/smartpqi/smartpqi.h ++++ b/drivers/scsi/smartpqi/smartpqi.h +@@ -918,7 +918,8 @@ union pqi_reset_register { + #define PQI_MAX_TRANSFER_SIZE (1024U * 1024U) + #define PQI_MAX_TRANSFER_SIZE_KDUMP (512 * 1024U) + +-#define RAID_MAP_MAX_ENTRIES 1024 ++#define RAID_MAP_MAX_ENTRIES 1024 ++#define RAID_MAP_MAX_DATA_DISKS_PER_ROW 128 + + #define PQI_PHYSICAL_DEVICE_BUS 0 + #define PQI_RAID_VOLUME_BUS 1 +@@ -1125,7 +1126,7 @@ struct pqi_scsi_dev { + u8 ncq_prio_support; + bool raid_bypass_configured; /* RAID bypass configured */ + bool raid_bypass_enabled; /* RAID bypass enabled */ +- u32 next_bypass_group; ++ u32 next_bypass_group[RAID_MAP_MAX_DATA_DISKS_PER_ROW]; + struct raid_map *raid_map; /* RAID bypass map */ + u32 max_transfer_encrypted; + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -2058,7 +2058,7 @@ static void pqi_scsi_update_device(struc + sizeof(existing_device->box)); + memcpy(existing_device->phys_connector, new_device->phys_connector, + sizeof(existing_device->phys_connector)); +- existing_device->next_bypass_group = 0; ++ memset(existing_device->next_bypass_group, 0, sizeof(existing_device->next_bypass_group)); + kfree(existing_device->raid_map); + existing_device->raid_map = new_device->raid_map; + existing_device->raid_bypass_configured = +@@ -2963,11 +2963,11 @@ static int pqi_raid_bypass_submit_scsi_c + if (rmd.is_write) { + pqi_calc_aio_r1_nexus(raid_map, &rmd); + } else { +- group = device->next_bypass_group; ++ group = device->next_bypass_group[rmd.map_index]; + next_bypass_group = group + 1; + if (next_bypass_group >= rmd.layout_map_count) + next_bypass_group = 0; +- device->next_bypass_group = next_bypass_group; ++ device->next_bypass_group[rmd.map_index] = next_bypass_group; + rmd.map_index += group * rmd.data_disks_per_row; + } + } else if ((device->raid_level == SA_RAID_5 || diff --git a/patches.suse/scsi-smartpqi-Stop-logging-spurious-PQI-reset-failur.patch b/patches.suse/scsi-smartpqi-Stop-logging-spurious-PQI-reset-failur.patch new file mode 100644 index 0000000..86e00ce --- /dev/null +++ b/patches.suse/scsi-smartpqi-Stop-logging-spurious-PQI-reset-failur.patch @@ -0,0 +1,39 @@ +From: Kevin Barnett +Date: Fri, 8 Jul 2022 13:47:31 -0500 +Subject: scsi: smartpqi: Stop logging spurious PQI reset failures +Patch-mainline: v6.0-rc1 +Git-commit: 85b41834b0f43b3e5c079ea9ea9288b2568b8c60 +References: jsc#PED-1557 + +Change method used to detect controller firmware crash during PQI reset. + +PQI reset can fail with error -6 if firmware takes > 100ms to complete +reset. + +Method used by driver to detect controller firmware crash during PQI was +incorrect in some cases. + +Link: https://lore.kernel.org/r/165730605108.177165.1132931838384767071.stgit@brunhilda +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Reviewed-by: Mike McGowen +Signed-off-by: Kevin Barnett +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -7412,8 +7412,7 @@ static int pqi_wait_for_pqi_reset_comple + reset_reg.all_bits = readl(&pqi_registers->device_reset); + if (reset_reg.bits.reset_action == PQI_RESET_ACTION_COMPLETED) + break; +- pqi_check_ctrl_health(ctrl_info); +- if (pqi_ctrl_offline(ctrl_info)) { ++ if (!sis_is_firmware_running(ctrl_info)) { + rc = -ENXIO; + break; + } diff --git a/patches.suse/scsi-smartpqi-Stop-using-the-SCSI-pointer.patch b/patches.suse/scsi-smartpqi-Stop-using-the-SCSI-pointer.patch new file mode 100644 index 0000000..4372669 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Stop-using-the-SCSI-pointer.patch @@ -0,0 +1,66 @@ +From: Bart Van Assche +Date: Fri, 18 Feb 2022 11:51:11 -0800 +Subject: scsi: smartpqi: Stop using the SCSI pointer +Patch-mainline: v5.18-rc1 +Git-commit: c1ea387d998ab524291f1b78f8faa4618decd36d +References: jsc#PED-1557 + +Set .cmd_size in the SCSI host template instead of using the SCSI pointer +from struct scsi_cmnd. This patch prepares for removal of the SCSI pointer +from struct scsi_cmnd. + +Link: https://lore.kernel.org/r/20220218195117.25689-44-bvanassche@acm.org +Reviewed-by: Johannes Thumshirn +Reviewed-by: Hannes Reinecke +Reviewed-by: Himanshu Madhani +Signed-off-by: Bart Van Assche +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -54,6 +54,15 @@ MODULE_DESCRIPTION("Driver for Microchip + MODULE_VERSION(DRIVER_VERSION); + MODULE_LICENSE("GPL"); + ++struct pqi_cmd_priv { ++ int this_residual; ++}; ++ ++static struct pqi_cmd_priv *pqi_cmd_priv(struct scsi_cmnd *cmd) ++{ ++ return scsi_cmd_priv(cmd); ++} ++ + static void pqi_verify_structures(void); + static void pqi_take_ctrl_offline(struct pqi_ctrl_info *ctrl_info, + enum pqi_ctrl_shutdown_reason ctrl_shutdown_reason); +@@ -5552,7 +5561,7 @@ static void pqi_aio_io_complete(struct p + scsi_dma_unmap(scmd); + if (io_request->status == -EAGAIN || pqi_raid_bypass_retry_needed(io_request)) { + set_host_byte(scmd, DID_IMM_RETRY); +- scmd->SCp.this_residual++; ++ pqi_cmd_priv(scmd)->this_residual++; + } + + pqi_free_io_request(io_request); +@@ -5814,7 +5823,7 @@ static inline bool pqi_is_bypass_eligibl + if (blk_rq_is_passthrough(scsi_cmd_to_rq(scmd))) + return false; + +- return scmd->SCp.this_residual == 0; ++ return pqi_cmd_priv(scmd)->this_residual == 0; + } + + /* +@@ -7262,6 +7271,7 @@ static struct scsi_host_template pqi_dri + .map_queues = pqi_map_queues, + .sdev_groups = pqi_sdev_groups, + .shost_groups = pqi_shost_groups, ++ .cmd_size = sizeof(struct pqi_cmd_priv), + }; + + static int pqi_register_scsi(struct pqi_ctrl_info *ctrl_info) diff --git a/patches.suse/scsi-smartpqi-Update-copyright-to-current-year.patch b/patches.suse/scsi-smartpqi-Update-copyright-to-current-year.patch new file mode 100644 index 0000000..3929ff3 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Update-copyright-to-current-year.patch @@ -0,0 +1,91 @@ +From: Don Brace +Date: Fri, 8 Jul 2022 13:48:01 -0500 +Subject: scsi: smartpqi: Update copyright to current year +Patch-mainline: v6.0-rc1 +Git-commit: e4b73b3fa2b98187c9cbb1364d6849ca4b7d6c25 +References: jsc#PED-1557 + +Update copyright to current year. + +Link: https://lore.kernel.org/r/165730608177.177165.13184715486635363193.stgit@brunhilda +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Reviewed-by: Kevin Barnett +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/Kconfig | 2 +- + drivers/scsi/smartpqi/smartpqi.h | 2 +- + drivers/scsi/smartpqi/smartpqi_init.c | 2 +- + drivers/scsi/smartpqi/smartpqi_sas_transport.c | 2 +- + drivers/scsi/smartpqi/smartpqi_sis.c | 2 +- + drivers/scsi/smartpqi/smartpqi_sis.h | 2 +- + 6 files changed, 6 insertions(+), 6 deletions(-) + +--- a/drivers/scsi/smartpqi/Kconfig ++++ b/drivers/scsi/smartpqi/Kconfig +@@ -1,7 +1,7 @@ + # + # Kernel configuration file for the SMARTPQI + # +-# Copyright (c) 2019-2021 Microchip Technology Inc. and its subsidiaries ++# Copyright (c) 2019-2022 Microchip Technology Inc. and its subsidiaries + # Copyright (c) 2017-2018 Microsemi Corporation + # Copyright (c) 2016 Microsemi Corporation + # Copyright (c) 2016 PMC-Sierra, Inc. +--- a/drivers/scsi/smartpqi/smartpqi.h ++++ b/drivers/scsi/smartpqi/smartpqi.h +@@ -1,7 +1,7 @@ + /* SPDX-License-Identifier: GPL-2.0 */ + /* + * driver for Microchip PQI-based storage controllers +- * Copyright (c) 2019-2021 Microchip Technology Inc. and its subsidiaries ++ * Copyright (c) 2019-2022 Microchip Technology Inc. and its subsidiaries + * Copyright (c) 2016-2018 Microsemi Corporation + * Copyright (c) 2016 PMC-Sierra, Inc. + * +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -1,7 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0 + /* + * driver for Microchip PQI-based storage controllers +- * Copyright (c) 2019-2021 Microchip Technology Inc. and its subsidiaries ++ * Copyright (c) 2019-2022 Microchip Technology Inc. and its subsidiaries + * Copyright (c) 2016-2018 Microsemi Corporation + * Copyright (c) 2016 PMC-Sierra, Inc. + * +--- a/drivers/scsi/smartpqi/smartpqi_sas_transport.c ++++ b/drivers/scsi/smartpqi/smartpqi_sas_transport.c +@@ -1,7 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0 + /* + * driver for Microchip PQI-based storage controllers +- * Copyright (c) 2019-2021 Microchip Technology Inc. and its subsidiaries ++ * Copyright (c) 2019-2022 Microchip Technology Inc. and its subsidiaries + * Copyright (c) 2016-2018 Microsemi Corporation + * Copyright (c) 2016 PMC-Sierra, Inc. + * +--- a/drivers/scsi/smartpqi/smartpqi_sis.c ++++ b/drivers/scsi/smartpqi/smartpqi_sis.c +@@ -1,7 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0 + /* + * driver for Microchip PQI-based storage controllers +- * Copyright (c) 2019-2021 Microchip Technology Inc. and its subsidiaries ++ * Copyright (c) 2019-2022 Microchip Technology Inc. and its subsidiaries + * Copyright (c) 2016-2018 Microsemi Corporation + * Copyright (c) 2016 PMC-Sierra, Inc. + * +--- a/drivers/scsi/smartpqi/smartpqi_sis.h ++++ b/drivers/scsi/smartpqi/smartpqi_sis.h +@@ -1,7 +1,7 @@ + /* SPDX-License-Identifier: GPL-2.0 */ + /* + * driver for Microchip PQI-based storage controllers +- * Copyright (c) 2019-2021 Microchip Technology Inc. and its subsidiaries ++ * Copyright (c) 2019-2022 Microchip Technology Inc. and its subsidiaries + * Copyright (c) 2016-2018 Microsemi Corporation + * Copyright (c) 2016 PMC-Sierra, Inc. + * diff --git a/patches.suse/scsi-smartpqi-Update-deleting-a-LUN-via-sysfs.patch b/patches.suse/scsi-smartpqi-Update-deleting-a-LUN-via-sysfs.patch new file mode 100644 index 0000000..a3851f3 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Update-deleting-a-LUN-via-sysfs.patch @@ -0,0 +1,104 @@ +From: Kevin Barnett +Date: Fri, 8 Jul 2022 13:47:51 -0500 +Subject: scsi: smartpqi: Update deleting a LUN via sysfs +Patch-mainline: v6.0-rc1 +Git-commit: 2d80f4054f7f901b8ad97358a9069616ac8524c7 +References: jsc#PED-1557 + +Change removing a LUN using sysfs from an internal driver function +pqi_remove_all_scsi_devices() to using the .slave_destroy entry in the +scsi_host_template. + +A LUN can be deleted via sysfs using this syntax: + +echo 1 > /sys/block/sdX/device/delete + +Link: https://lore.kernel.org/r/165730607154.177165.9723066932202995774.stgit@brunhilda +Reviewed-by: Scott Teel +Reviewed-by: Mike McGowen +Signed-off-by: Kevin Barnett +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 48 +++++++++++++++++++++------------- + 1 file changed, 30 insertions(+), 18 deletions(-) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -2536,23 +2536,6 @@ out: + return rc; + } + +-static void pqi_remove_all_scsi_devices(struct pqi_ctrl_info *ctrl_info) +-{ +- unsigned long flags; +- struct pqi_scsi_dev *device; +- struct pqi_scsi_dev *next; +- +- list_for_each_entry_safe(device, next, &ctrl_info->scsi_device_list, +- scsi_device_list_entry) { +- if (pqi_is_device_added(device)) +- pqi_remove_device(ctrl_info, device); +- spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); +- list_del(&device->scsi_device_list_entry); +- pqi_free_device(device); +- spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); +- } +-} +- + static int pqi_scan_scsi_devices(struct pqi_ctrl_info *ctrl_info) + { + int rc; +@@ -6476,6 +6459,35 @@ static int pqi_slave_configure(struct sc + return rc; + } + ++static void pqi_slave_destroy(struct scsi_device *sdev) ++{ ++ struct pqi_ctrl_info *ctrl_info; ++ struct pqi_scsi_dev *device; ++ int mutex_acquired; ++ unsigned long flags; ++ ++ ctrl_info = shost_to_hba(sdev->host); ++ ++ mutex_acquired = mutex_trylock(&ctrl_info->scan_mutex); ++ if (!mutex_acquired) ++ return; ++ ++ device = sdev->hostdata; ++ if (!device) { ++ mutex_unlock(&ctrl_info->scan_mutex); ++ return; ++ } ++ ++ spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags); ++ list_del(&device->scsi_device_list_entry); ++ spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); ++ ++ mutex_unlock(&ctrl_info->scan_mutex); ++ ++ pqi_dev_info(ctrl_info, "removed", device); ++ pqi_free_device(device); ++} ++ + static int pqi_getpciinfo_ioctl(struct pqi_ctrl_info *ctrl_info, void __user *arg) + { + struct pci_dev *pci_dev; +@@ -7363,6 +7375,7 @@ static struct scsi_host_template pqi_dri + .ioctl = pqi_ioctl, + .slave_alloc = pqi_slave_alloc, + .slave_configure = pqi_slave_configure, ++ .slave_destroy = pqi_slave_destroy, + .map_queues = pqi_map_queues, + .sdev_groups = pqi_sdev_groups, + .shost_groups = pqi_shost_groups, +@@ -8649,7 +8662,6 @@ static void pqi_remove_ctrl(struct pqi_c + pqi_fail_all_outstanding_requests(ctrl_info); + ctrl_info->pqi_mode_enabled = false; + } +- pqi_remove_all_scsi_devices(ctrl_info); + pqi_unregister_scsi(ctrl_info); + if (ctrl_info->pqi_mode_enabled) + pqi_revert_to_sis_mode(ctrl_info); diff --git a/patches.suse/scsi-smartpqi-Update-version-to-2.1.14-035.patch b/patches.suse/scsi-smartpqi-Update-version-to-2.1.14-035.patch new file mode 100644 index 0000000..513bbac --- /dev/null +++ b/patches.suse/scsi-smartpqi-Update-version-to-2.1.14-035.patch @@ -0,0 +1,36 @@ +From: Don Brace +Date: Tue, 1 Feb 2022 15:49:18 -0600 +Subject: scsi: smartpqi: Update version to 2.1.14-035 +Patch-mainline: v5.18-rc1 +Git-commit: 62ed6622aaf0ba3c41cc6db6f901cbaa2a7378d1 +References: jsc#PED-1557 + +Link: https://lore.kernel.org/r/164375215867.440833.17567317655622946368.stgit@brunhilda.pdev.net +Reviewed-by: Kevin Barnett +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Reviewed-by: Gerry Morong +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -33,11 +33,11 @@ + #define BUILD_TIMESTAMP + #endif + +-#define DRIVER_VERSION "2.1.12-055" ++#define DRIVER_VERSION "2.1.14-035" + #define DRIVER_MAJOR 2 + #define DRIVER_MINOR 1 +-#define DRIVER_RELEASE 12 +-#define DRIVER_REVISION 55 ++#define DRIVER_RELEASE 14 ++#define DRIVER_REVISION 35 + + #define DRIVER_NAME "Microchip SmartPQI Driver (v" \ + DRIVER_VERSION BUILD_TIMESTAMP ")" diff --git a/patches.suse/scsi-smartpqi-Update-version-to-2.1.18-045.patch b/patches.suse/scsi-smartpqi-Update-version-to-2.1.18-045.patch new file mode 100644 index 0000000..0fa24b6 --- /dev/null +++ b/patches.suse/scsi-smartpqi-Update-version-to-2.1.18-045.patch @@ -0,0 +1,36 @@ +From: Don Brace +Date: Fri, 8 Jul 2022 13:48:06 -0500 +Subject: scsi: smartpqi: Update version to 2.1.18-045 +Patch-mainline: v6.0-rc1 +Git-commit: f54f85dfd757301791be8ce6fccc6f6604d82b40 +References: jsc#PED-1557 + +Link: https://lore.kernel.org/r/165730608687.177165.11815510982277242966.stgit@brunhilda +Reviewed-by: Gerry Morong +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Reviewed-by: Kevin Barnett +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi_init.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -33,11 +33,11 @@ + #define BUILD_TIMESTAMP + #endif + +-#define DRIVER_VERSION "2.1.14-035" ++#define DRIVER_VERSION "2.1.18-045" + #define DRIVER_MAJOR 2 + #define DRIVER_MINOR 1 +-#define DRIVER_RELEASE 14 +-#define DRIVER_REVISION 35 ++#define DRIVER_RELEASE 18 ++#define DRIVER_REVISION 45 + + #define DRIVER_NAME "Microchip SmartPQI Driver (v" \ + DRIVER_VERSION BUILD_TIMESTAMP ")" diff --git a/patches.suse/scsi-smartpqi-Update-volume-size-after-expansion.patch b/patches.suse/scsi-smartpqi-Update-volume-size-after-expansion.patch new file mode 100644 index 0000000..9aef00b --- /dev/null +++ b/patches.suse/scsi-smartpqi-Update-volume-size-after-expansion.patch @@ -0,0 +1,106 @@ +From: Mahesh Rajashekhara +Date: Tue, 1 Feb 2022 15:48:38 -0600 +Subject: scsi: smartpqi: Update volume size after expansion +Patch-mainline: v5.18-rc1 +Git-commit: 27655e9db47965f640b3ef5a6796587d58b523eb +References: jsc#PED-1557 + +After modifying logical volume size, lsblk command still shows previous +size of logical volume. + +When the driver gets any event from firmware it schedules rescan worker +with delay of 10 seconds. If array expansion is so quick that it gets +completed in a second, the driver could not catch logical volume expansion +due to worker delay. + +Since driver does not detect volume expansion, driver would not call +rescan device to update new size to the OS. + +Link: https://lore.kernel.org/r/164375211833.440833.17023155389220583731.stgit@brunhilda.pdev.net +Reviewed-by: Kevin Barnett +Reviewed-by: Mike McGowen +Reviewed-by: Scott Benesh +Reviewed-by: Scott Teel +Signed-off-by: Mahesh Rajashekhara +Signed-off-by: Don Brace +Signed-off-by: Martin K. Petersen +Acked-by: Martin Wilck +--- + drivers/scsi/smartpqi/smartpqi.h | 1 + + drivers/scsi/smartpqi/smartpqi_init.c | 20 ++++++++++++-------- + 2 files changed, 13 insertions(+), 8 deletions(-) + +--- a/drivers/scsi/smartpqi/smartpqi.h ++++ b/drivers/scsi/smartpqi/smartpqi.h +@@ -1322,6 +1322,7 @@ struct pqi_ctrl_info { + bool controller_online; + bool block_requests; + bool scan_blocked; ++ u8 logical_volume_rescan_needed : 1; + u8 inbound_spanning_supported : 1; + u8 outbound_spanning_supported : 1; + u8 pqi_mode_enabled : 1; +--- a/drivers/scsi/smartpqi/smartpqi_init.c ++++ b/drivers/scsi/smartpqi/smartpqi_init.c +@@ -2015,8 +2015,8 @@ static void pqi_dev_info(struct pqi_ctrl + + /* Assumes the SCSI device list lock is held. */ + +-static void pqi_scsi_update_device(struct pqi_scsi_dev *existing_device, +- struct pqi_scsi_dev *new_device) ++static void pqi_scsi_update_device(struct pqi_ctrl_info *ctrl_info, ++ struct pqi_scsi_dev *existing_device, struct pqi_scsi_dev *new_device) + { + existing_device->device_type = new_device->device_type; + existing_device->bus = new_device->bus; +@@ -2026,9 +2026,8 @@ static void pqi_scsi_update_device(struc + existing_device->target_lun_valid = true; + } + +- if ((existing_device->volume_status == CISS_LV_QUEUED_FOR_EXPANSION || +- existing_device->volume_status == CISS_LV_UNDERGOING_EXPANSION) && +- new_device->volume_status == CISS_LV_OK) ++ if (pqi_is_logical_device(existing_device) && ++ ctrl_info->logical_volume_rescan_needed) + existing_device->rescan = true; + + /* By definition, the scsi3addr and wwid fields are already the same. */ +@@ -2146,7 +2145,7 @@ static void pqi_update_device_list(struc + */ + device->new_device = false; + matching_device->device_gone = false; +- pqi_scsi_update_device(matching_device, device); ++ pqi_scsi_update_device(ctrl_info, matching_device, device); + break; + case DEVICE_NOT_FOUND: + /* +@@ -2218,8 +2217,8 @@ static void pqi_update_device_list(struc + } + + /* +- * Notify the SCSI ML if the queue depth of any existing device has +- * changed. ++ * Notify the SML of any existing device changes such as; ++ * queue depth, device size. + */ + list_for_each_entry(device, &ctrl_info->scsi_device_list, scsi_device_list_entry) { + if (device->sdev && device->queue_depth != device->advertised_queue_depth) { +@@ -2248,6 +2247,9 @@ static void pqi_update_device_list(struc + } + } + } ++ ++ ctrl_info->logical_volume_rescan_needed = false; ++ + } + + static inline bool pqi_is_supported_device(struct pqi_scsi_dev *device) +@@ -3703,6 +3705,8 @@ static void pqi_event_worker(struct work + } else { + ack_event = true; + rescan_needed = true; ++ if (event->event_type == PQI_EVENT_TYPE_LOGICAL_DEVICE) ++ ctrl_info->logical_volume_rescan_needed = true; + } + if (ack_event) + pqi_acknowledge_event(ctrl_info, event); diff --git a/patches.suse/scsi-ufs-Fix-runtime-PM-messages-never-ending-cycle.patch b/patches.suse/scsi-ufs-Fix-runtime-PM-messages-never-ending-cycle.patch index edf17a6..34b16a8 100644 --- a/patches.suse/scsi-ufs-Fix-runtime-PM-messages-never-ending-cycle.patch +++ b/patches.suse/scsi-ufs-Fix-runtime-PM-messages-never-ending-cycle.patch @@ -76,7 +76,7 @@ Acked-by: Lee Duncan + */ + sdev->silence_suspend = 1; - ufshcd_crypto_setup_rq_keyslot_manager(hba, q); + ufshcd_crypto_register(hba, q); @@ -7207,7 +7218,13 @@ static u32 ufshcd_find_max_sup_active_ic diff --git a/patches.suse/scsi_probe_lun-retry-after-timeout.patch b/patches.suse/scsi_probe_lun-retry-after-timeout.patch new file mode 100644 index 0000000..30339d6 --- /dev/null +++ b/patches.suse/scsi_probe_lun-retry-after-timeout.patch @@ -0,0 +1,36 @@ +From: Martin Wilck +Date: Fri, 05 Nov 2021 15:43:45 +0600 +Subject: [PATCH] SCSI: scsi_probe_lun: retry INQUIRY after timeout +References: bsc#1189297 +Patch-mainline: Never, upstream will use a different approach that can't be backported + +Currently scsi_probe_lun() retries only for certain types of UNIT ATTENTION. +With this patch, it should also retry after a timeout (max. 3 tries +altogether). + +Signed-off-by: Martin Wilck +--- + drivers/scsi/scsi_scan.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +--- a/drivers/scsi/scsi_scan.c ++++ b/drivers/scsi/scsi_scan.c +@@ -636,6 +636,18 @@ static int scsi_probe_lun(struct scsi_de + (sshdr.ascq == 0)) + continue; + } ++ /* ++ * The retry count 3 in scsi_execute_req() above has no ++ * effect, because the mid layer doesn't retry ++ * REQ_OP_SCSI commands, relying on callers. ++ * So retry here. ++ */ ++ if (host_byte(result) == DID_TIME_OUT) { ++ SCSI_LOG_SCAN_BUS(3, sdev_printk(KERN_INFO, sdev, ++ "scsi scan: retry after timeout\n")); ++ continue; ++ } ++ + } else if (result == 0) { + /* + * if nothing was transferred, we try diff --git a/patches.suse/sd-implement-get_unique_id.patch b/patches.suse/sd-implement-get_unique_id.patch new file mode 100644 index 0000000..e4d956e --- /dev/null +++ b/patches.suse/sd-implement-get_unique_id.patch @@ -0,0 +1,79 @@ +From: Christoph Hellwig +Date: Thu, 21 Oct 2021 08:06:02 +0200 +Subject: [PATCH] sd: implement ->get_unique_id +Git-commit: b83ce214af3885437ff223b3a0c8ec6072a84167 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Add the method to query for a uniqueue ID of a given type by looking +it up in the cached device identification VPD page. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Hannes Reinecke +Link: https://lore.kernel.org/r/20211021060607.264371-3-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/scsi/sd.c | 39 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 39 insertions(+) + +diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c +index d8f6add416c0..9b386ca39a91 100644 +--- a/drivers/scsi/sd.c ++++ b/drivers/scsi/sd.c +@@ -1757,6 +1757,44 @@ static void sd_rescan(struct device *dev) + sd_revalidate_disk(sdkp->disk); + } + ++static int sd_get_unique_id(struct gendisk *disk, u8 id[16], ++ enum blk_unique_id type) ++{ ++ struct scsi_device *sdev = scsi_disk(disk)->device; ++ const struct scsi_vpd *vpd; ++ const unsigned char *d; ++ int ret = -ENXIO, len; ++ ++ rcu_read_lock(); ++ vpd = rcu_dereference(sdev->vpd_pg83); ++ if (!vpd) ++ goto out_unlock; ++ ++ ret = -EINVAL; ++ for (d = vpd->data + 4; d < vpd->data + vpd->len; d += d[3] + 4) { ++ /* we only care about designators with LU association */ ++ if (((d[1] >> 4) & 0x3) != 0x00) ++ continue; ++ if ((d[1] & 0xf) != type) ++ continue; ++ ++ /* ++ * Only exit early if a 16-byte descriptor was found. Otherwise ++ * keep looking as one with more entropy might still show up. ++ */ ++ len = d[3]; ++ if (len != 8 && len != 12 && len != 16) ++ continue; ++ ret = len; ++ memcpy(id, d + 4, len); ++ if (len == 16) ++ break; ++ } ++out_unlock: ++ rcu_read_unlock(); ++ return ret; ++} ++ + static char sd_pr_type(enum pr_type type) + { + switch (type) { +@@ -1861,6 +1899,7 @@ static const struct block_device_operations sd_fops = { + .check_events = sd_check_events, + .unlock_native_capacity = sd_unlock_native_capacity, + .report_zones = sd_zbc_report_zones, ++ .get_unique_id = sd_get_unique_id, + .pr_ops = &sd_pr_ops, + }; + +-- +2.35.3 + diff --git a/patches.suse/selfetests-bpf-Adapt-vmtest.sh-to-s390-libbpf-CI-cha.patch b/patches.suse/selfetests-bpf-Adapt-vmtest.sh-to-s390-libbpf-CI-cha.patch new file mode 100644 index 0000000..d655bad --- /dev/null +++ b/patches.suse/selfetests-bpf-Adapt-vmtest.sh-to-s390-libbpf-CI-cha.patch @@ -0,0 +1,126 @@ +From: Ilya Leoshkevich +Date: Thu, 18 Nov 2021 12:52:25 +0100 +Subject: selfetests/bpf: Adapt vmtest.sh to s390 libbpf CI changes +Patch-mainline: v5.17-rc1 +Git-commit: 29ad850a5cae84757bcd4c60e0d74232ef8c5157 +References: jsc#PED-1368 + +[1] added s390 support to libbpf CI and added an ${ARCH} prefix to a +number of paths and identifiers in libbpf GitHub repo, which vmtest.sh +relies upon. Update these and make use of the new s390 support. + +[1] https://github.com/libbpf/libbpf/pull/204 + +Co-developed-by: Andrii Nakryiko +Signed-off-by: Andrii Nakryiko +Signed-off-by: Ilya Leoshkevich +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211118115225.1349726-1-iii@linux.ibm.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/vmtest.sh | 46 ++++++++++++++++++++++------------ + 1 file changed, 31 insertions(+), 15 deletions(-) + +--- a/tools/testing/selftests/bpf/vmtest.sh ++++ b/tools/testing/selftests/bpf/vmtest.sh +@@ -4,17 +4,34 @@ + set -u + set -e + +-# This script currently only works for x86_64, as +-# it is based on the VM image used by the BPF CI which is +-# x86_64. +-QEMU_BINARY="${QEMU_BINARY:="qemu-system-x86_64"}" +-X86_BZIMAGE="arch/x86/boot/bzImage" ++# This script currently only works for x86_64 and s390x, as ++# it is based on the VM image used by the BPF CI, which is ++# available only for these architectures. ++ARCH="$(uname -m)" ++case "${ARCH}" in ++s390x) ++ QEMU_BINARY=qemu-system-s390x ++ QEMU_CONSOLE="ttyS1" ++ QEMU_FLAGS=(-smp 2) ++ BZIMAGE="arch/s390/boot/compressed/vmlinux" ++ ;; ++x86_64) ++ QEMU_BINARY=qemu-system-x86_64 ++ QEMU_CONSOLE="ttyS0,115200" ++ QEMU_FLAGS=(-cpu host -smp 8) ++ BZIMAGE="arch/x86/boot/bzImage" ++ ;; ++*) ++ echo "Unsupported architecture" ++ exit 1 ++ ;; ++esac + DEFAULT_COMMAND="./test_progs" + MOUNT_DIR="mnt" + ROOTFS_IMAGE="root.img" + OUTPUT_DIR="$HOME/.bpf_selftests" +-KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/latest.config" +-KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/latest.config" ++KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/config-latest.${ARCH}" ++KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/config-latest.${ARCH}" + INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX" + NUM_COMPILE_JOBS="$(nproc)" + LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")" +@@ -85,7 +102,7 @@ newest_rootfs_version() + { + { + for file in "${!URLS[@]}"; do +- if [[ $file =~ ^libbpf-vmtest-rootfs-(.*)\.tar\.zst$ ]]; then ++ if [[ $file =~ ^"${ARCH}"/libbpf-vmtest-rootfs-(.*)\.tar\.zst$ ]]; then + echo "${BASH_REMATCH[1]}" + fi + done +@@ -102,7 +119,7 @@ download_rootfs() + exit 1 + fi + +- download "libbpf-vmtest-rootfs-$rootfsversion.tar.zst" | ++ download "${ARCH}/libbpf-vmtest-rootfs-$rootfsversion.tar.zst" | + zstd -d | sudo tar -C "$dir" -x + } + +@@ -224,13 +241,12 @@ EOF + -nodefaults \ + -display none \ + -serial mon:stdio \ +- -cpu host \ ++ "${qemu_flags[@]}" \ + -enable-kvm \ +- -smp 8 \ + -m 4G \ + -drive file="${rootfs_img}",format=raw,index=1,media=disk,if=virtio,cache=none \ + -kernel "${kernel_bzimage}" \ +- -append "root=/dev/vda rw console=ttyS0,115200" ++ -append "root=/dev/vda rw console=${QEMU_CONSOLE}" + } + + copy_logs() +@@ -282,7 +298,7 @@ main() + local kernel_checkout=$(realpath "${script_dir}"/../../../../) + # By default the script searches for the kernel in the checkout directory but + # it also obeys environment variables O= and KBUILD_OUTPUT= +- local kernel_bzimage="${kernel_checkout}/${X86_BZIMAGE}" ++ local kernel_bzimage="${kernel_checkout}/${BZIMAGE}" + local command="${DEFAULT_COMMAND}" + local update_image="no" + local exit_command="poweroff -f" +@@ -337,13 +353,13 @@ main() + if is_rel_path "${O}"; then + O="$(realpath "${PWD}/${O}")" + fi +- kernel_bzimage="${O}/${X86_BZIMAGE}" ++ kernel_bzimage="${O}/${BZIMAGE}" + make_command="${make_command} O=${O}" + elif [[ "${KBUILD_OUTPUT:=""}" != "" ]]; then + if is_rel_path "${KBUILD_OUTPUT}"; then + KBUILD_OUTPUT="$(realpath "${PWD}/${KBUILD_OUTPUT}")" + fi +- kernel_bzimage="${KBUILD_OUTPUT}/${X86_BZIMAGE}" ++ kernel_bzimage="${KBUILD_OUTPUT}/${BZIMAGE}" + make_command="${make_command} KBUILD_OUTPUT=${KBUILD_OUTPUT}" + fi + diff --git a/patches.suse/selftest-bpf-benchs-Add-bpf_loop-benchmark.patch b/patches.suse/selftest-bpf-benchs-Add-bpf_loop-benchmark.patch new file mode 100644 index 0000000..77a6b0b --- /dev/null +++ b/patches.suse/selftest-bpf-benchs-Add-bpf_loop-benchmark.patch @@ -0,0 +1,356 @@ +From: Joanne Koong +Date: Mon, 29 Nov 2021 19:06:22 -0800 +Subject: selftest/bpf/benchs: Add bpf_loop benchmark +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.17-rc1 +Git-commit: ec151037af4f56065d5b258af82f13dbbf279ebd +References: jsc#PED-1368 + +Add benchmark to measure the throughput and latency of the bpf_loop +call. + +Testing this on my dev machine on 1 thread, the data is as follows: + + nr_loops: 10 +bpf_loop - throughput: 198.519 ± 0.155 M ops/s, latency: 5.037 ns/op + + nr_loops: 100 +bpf_loop - throughput: 247.448 ± 0.305 M ops/s, latency: 4.041 ns/op + + nr_loops: 500 +bpf_loop - throughput: 260.839 ± 0.380 M ops/s, latency: 3.834 ns/op + + nr_loops: 1000 +bpf_loop - throughput: 262.806 ± 0.629 M ops/s, latency: 3.805 ns/op + + nr_loops: 5000 +bpf_loop - throughput: 264.211 ± 1.508 M ops/s, latency: 3.785 ns/op + + nr_loops: 10000 +bpf_loop - throughput: 265.366 ± 3.054 M ops/s, latency: 3.768 ns/op + + nr_loops: 50000 +bpf_loop - throughput: 235.986 ± 20.205 M ops/s, latency: 4.238 ns/op + + nr_loops: 100000 +bpf_loop - throughput: 264.482 ± 0.279 M ops/s, latency: 3.781 ns/op + + nr_loops: 500000 +bpf_loop - throughput: 309.773 ± 87.713 M ops/s, latency: 3.228 ns/op + + nr_loops: 1000000 +bpf_loop - throughput: 262.818 ± 4.143 M ops/s, latency: 3.805 ns/op + +>From this data, we can see that the latency per loop decreases as the +number of loops increases. On this particular machine, each loop had an +overhead of about ~4 ns, and we were able to run ~250 million loops +per second. + +Signed-off-by: Joanne Koong +Signed-off-by: Alexei Starovoitov +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211130030622.4131246-5-joannekoong@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/Makefile | 4 + tools/testing/selftests/bpf/bench.c | 37 +++++ + tools/testing/selftests/bpf/bench.h | 2 + tools/testing/selftests/bpf/benchs/bench_bpf_loop.c | 105 +++++++++++++++ + tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh | 15 ++ + tools/testing/selftests/bpf/benchs/run_common.sh | 15 ++ + tools/testing/selftests/bpf/progs/bpf_loop_bench.c | 26 +++ + 7 files changed, 203 insertions(+), 1 deletion(-) + create mode 100644 tools/testing/selftests/bpf/benchs/bench_bpf_loop.c + create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh + create mode 100644 tools/testing/selftests/bpf/progs/bpf_loop_bench.c + +--- a/tools/testing/selftests/bpf/Makefile ++++ b/tools/testing/selftests/bpf/Makefile +@@ -531,6 +531,7 @@ $(OUTPUT)/bench_trigger.o: $(OUTPUT)/tri + $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ + $(OUTPUT)/perfbuf_bench.skel.h + $(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h ++$(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h + $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) + $(OUTPUT)/bench: LDLIBS += -lm + $(OUTPUT)/bench: $(OUTPUT)/bench.o \ +@@ -540,7 +541,8 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ + $(OUTPUT)/bench_rename.o \ + $(OUTPUT)/bench_trigger.o \ + $(OUTPUT)/bench_ringbufs.o \ +- $(OUTPUT)/bench_bloom_filter_map.o ++ $(OUTPUT)/bench_bloom_filter_map.o \ ++ $(OUTPUT)/bench_bpf_loop.o + $(call msg,BINARY,,$@) + $(Q)$(CC) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ + +--- a/tools/testing/selftests/bpf/bench.c ++++ b/tools/testing/selftests/bpf/bench.c +@@ -134,6 +134,39 @@ void hits_drops_report_final(struct benc + total_ops_mean, total_ops_stddev); + } + ++void ops_report_progress(int iter, struct bench_res *res, long delta_ns) ++{ ++ double hits_per_sec, hits_per_prod; ++ ++ hits_per_sec = res->hits / 1000000.0 / (delta_ns / 1000000000.0); ++ hits_per_prod = hits_per_sec / env.producer_cnt; ++ ++ printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0); ++ ++ printf("hits %8.3lfM/s (%7.3lfM/prod)\n", hits_per_sec, hits_per_prod); ++} ++ ++void ops_report_final(struct bench_res res[], int res_cnt) ++{ ++ double hits_mean = 0.0, hits_stddev = 0.0; ++ int i; ++ ++ for (i = 0; i < res_cnt; i++) ++ hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt); ++ ++ if (res_cnt > 1) { ++ for (i = 0; i < res_cnt; i++) ++ hits_stddev += (hits_mean - res[i].hits / 1000000.0) * ++ (hits_mean - res[i].hits / 1000000.0) / ++ (res_cnt - 1.0); ++ ++ hits_stddev = sqrt(hits_stddev); ++ } ++ printf("Summary: throughput %8.3lf \u00B1 %5.3lf M ops/s (%7.3lfM ops/prod), ", ++ hits_mean, hits_stddev, hits_mean / env.producer_cnt); ++ printf("latency %8.3lf ns/op\n", 1000.0 / hits_mean * env.producer_cnt); ++} ++ + const char *argp_program_version = "benchmark"; + const char *argp_program_bug_address = ""; + const char argp_program_doc[] = +@@ -171,10 +204,12 @@ static const struct argp_option opts[] = + + extern struct argp bench_ringbufs_argp; + extern struct argp bench_bloom_map_argp; ++extern struct argp bench_bpf_loop_argp; + + static const struct argp_child bench_parsers[] = { + { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, + { &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 }, ++ { &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 }, + {}, + }; + +@@ -373,6 +408,7 @@ extern const struct bench bench_bloom_up + extern const struct bench bench_bloom_false_positive; + extern const struct bench bench_hashmap_without_bloom; + extern const struct bench bench_hashmap_with_bloom; ++extern const struct bench bench_bpf_loop; + + static const struct bench *benchs[] = { + &bench_count_global, +@@ -404,6 +440,7 @@ static const struct bench *benchs[] = { + &bench_bloom_false_positive, + &bench_hashmap_without_bloom, + &bench_hashmap_with_bloom, ++ &bench_bpf_loop, + }; + + static void setup_benchmark() +--- a/tools/testing/selftests/bpf/bench.h ++++ b/tools/testing/selftests/bpf/bench.h +@@ -59,6 +59,8 @@ void hits_drops_report_progress(int iter + void hits_drops_report_final(struct bench_res res[], int res_cnt); + void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns); + void false_hits_report_final(struct bench_res res[], int res_cnt); ++void ops_report_progress(int iter, struct bench_res *res, long delta_ns); ++void ops_report_final(struct bench_res res[], int res_cnt); + + static inline __u64 get_time_ns() { + struct timespec t; +--- /dev/null ++++ b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c +@@ -0,0 +1,105 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) 2021 Facebook */ ++ ++#include ++#include "bench.h" ++#include "bpf_loop_bench.skel.h" ++ ++/* BPF triggering benchmarks */ ++static struct ctx { ++ struct bpf_loop_bench *skel; ++} ctx; ++ ++static struct { ++ __u32 nr_loops; ++} args = { ++ .nr_loops = 10, ++}; ++ ++enum { ++ ARG_NR_LOOPS = 4000, ++}; ++ ++static const struct argp_option opts[] = { ++ { "nr_loops", ARG_NR_LOOPS, "nr_loops", 0, ++ "Set number of loops for the bpf_loop helper"}, ++ {}, ++}; ++ ++static error_t parse_arg(int key, char *arg, struct argp_state *state) ++{ ++ switch (key) { ++ case ARG_NR_LOOPS: ++ args.nr_loops = strtol(arg, NULL, 10); ++ break; ++ default: ++ return ARGP_ERR_UNKNOWN; ++ } ++ ++ return 0; ++} ++ ++/* exported into benchmark runner */ ++const struct argp bench_bpf_loop_argp = { ++ .options = opts, ++ .parser = parse_arg, ++}; ++ ++static void validate(void) ++{ ++ if (env.consumer_cnt != 1) { ++ fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); ++ exit(1); ++ } ++} ++ ++static void *producer(void *input) ++{ ++ while (true) ++ /* trigger the bpf program */ ++ syscall(__NR_getpgid); ++ ++ return NULL; ++} ++ ++static void *consumer(void *input) ++{ ++ return NULL; ++} ++ ++static void measure(struct bench_res *res) ++{ ++ res->hits = atomic_swap(&ctx.skel->bss->hits, 0); ++} ++ ++static void setup(void) ++{ ++ struct bpf_link *link; ++ ++ setup_libbpf(); ++ ++ ctx.skel = bpf_loop_bench__open_and_load(); ++ if (!ctx.skel) { ++ fprintf(stderr, "failed to open skeleton\n"); ++ exit(1); ++ } ++ ++ link = bpf_program__attach(ctx.skel->progs.benchmark); ++ if (!link) { ++ fprintf(stderr, "failed to attach program!\n"); ++ exit(1); ++ } ++ ++ ctx.skel->bss->nr_loops = args.nr_loops; ++} ++ ++const struct bench bench_bpf_loop = { ++ .name = "bpf-loop", ++ .validate = validate, ++ .setup = setup, ++ .producer_thread = producer, ++ .consumer_thread = consumer, ++ .measure = measure, ++ .report_progress = ops_report_progress, ++ .report_final = ops_report_final, ++}; +--- /dev/null ++++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh +@@ -0,0 +1,15 @@ ++#!/bin/bash ++# SPDX-License-Identifier: GPL-2.0 ++ ++source ./benchs/run_common.sh ++ ++set -eufo pipefail ++ ++for t in 1 4 8 12 16; do ++for i in 10 100 500 1000 5000 10000 50000 100000 500000 1000000; do ++subtitle "nr_loops: $i, nr_threads: $t" ++ summarize_ops "bpf_loop: " \ ++ "$($RUN_BENCH -p $t --nr_loops $i bpf-loop)" ++ printf "\n" ++done ++done +--- a/tools/testing/selftests/bpf/benchs/run_common.sh ++++ b/tools/testing/selftests/bpf/benchs/run_common.sh +@@ -33,6 +33,14 @@ function percentage() + echo "$*" | sed -E "s/.*Percentage\s=\s+([0-9]+\.[0-9]+).*/\1/" + } + ++function ops() ++{ ++ echo -n "throughput: " ++ echo -n "$*" | sed -E "s/.*throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/" ++ echo -n -e ", latency: " ++ echo "$*" | sed -E "s/.*latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/" ++} ++ + function total() + { + echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +@@ -52,6 +60,13 @@ function summarize_percentage() + printf "%-20s %s%%\n" "$bench" "$(percentage $summary)" + } + ++function summarize_ops() ++{ ++ bench="$1" ++ summary=$(echo $2 | tail -n1) ++ printf "%-20s %s\n" "$bench" "$(ops $summary)" ++} ++ + function summarize_total() + { + bench="$1" +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c +@@ -0,0 +1,26 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) 2021 Facebook */ ++ ++#include "vmlinux.h" ++#include ++ ++char _license[] SEC("license") = "GPL"; ++ ++u32 nr_loops; ++long hits; ++ ++static int empty_callback(__u32 index, void *data) ++{ ++ return 0; ++} ++ ++SEC("fentry/__x64_sys_getpgid") ++int benchmark(void *ctx) ++{ ++ for (int i = 0; i < 1000; i++) { ++ bpf_loop(nr_loops, empty_callback, NULL, 0); ++ ++ __sync_add_and_fetch(&hits, nr_loops); ++ } ++ return 0; ++} diff --git a/patches.suse/selftests-bpf-Add-BTF_KIND_TYPE_TAG-unit-tests.patch b/patches.suse/selftests-bpf-Add-BTF_KIND_TYPE_TAG-unit-tests.patch new file mode 100644 index 0000000..803ef1c --- /dev/null +++ b/patches.suse/selftests-bpf-Add-BTF_KIND_TYPE_TAG-unit-tests.patch @@ -0,0 +1,63 @@ +From: Yonghong Song +Date: Thu, 11 Nov 2021 17:26:30 -0800 +Subject: selftests/bpf: Add BTF_KIND_TYPE_TAG unit tests +Patch-mainline: v5.17-rc1 +Git-commit: 6aa5dabc9d0ef722905e4ca4f9751d70cf3ec8a4 +References: jsc#PED-1368 + +Add BTF_KIND_TYPE_TAG unit tests. + +Signed-off-by: Yonghong Song +Signed-off-by: Alexei Starovoitov +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211112012630.1506095-1-yhs@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/btf.c | 18 ++++++++++++++++++ + tools/testing/selftests/bpf/test_btf.h | 3 +++ + 2 files changed, 21 insertions(+) + +--- a/tools/testing/selftests/bpf/prog_tests/btf.c ++++ b/tools/testing/selftests/bpf/prog_tests/btf.c +@@ -3939,6 +3939,23 @@ static struct btf_raw_test raw_tests[] = + .btf_load_err = true, + .err_str = "Invalid component_idx", + }, ++{ ++ .descr = "type_tag test #1", ++ .raw_types = { ++ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ ++ BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [2] */ ++ BTF_PTR_ENC(2), /* [3] */ ++ BTF_END_RAW, ++ }, ++ BTF_STR_SEC("\0tag"), ++ .map_type = BPF_MAP_TYPE_ARRAY, ++ .map_name = "tag_type_check_btf", ++ .key_size = sizeof(int), ++ .value_size = 4, ++ .key_type_id = 1, ++ .value_type_id = 1, ++ .max_entries = 1, ++}, + + }; /* struct btf_raw_test raw_tests[] */ + +@@ -7222,6 +7239,7 @@ static int btf_type_size(const struct bt + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + case BTF_KIND_FLOAT: ++ case BTF_KIND_TYPE_TAG: + return base_size; + case BTF_KIND_INT: + return base_size + sizeof(__u32); +--- a/tools/testing/selftests/bpf/test_btf.h ++++ b/tools/testing/selftests/bpf/test_btf.h +@@ -72,4 +72,7 @@ + #define BTF_DECL_TAG_ENC(value, type, component_idx) \ + BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx) + ++#define BTF_TYPE_TAG_ENC(value, type) \ ++ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 0, 0), type) ++ + #endif /* _TEST_BTF_H */ diff --git a/patches.suse/selftests-bpf-Add-CO-RE-relocations-to-verifier-scal.patch b/patches.suse/selftests-bpf-Add-CO-RE-relocations-to-verifier-scal.patch new file mode 100644 index 0000000..efa20c9 --- /dev/null +++ b/patches.suse/selftests-bpf-Add-CO-RE-relocations-to-verifier-scal.patch @@ -0,0 +1,33 @@ +From: Alexei Starovoitov +Date: Wed, 1 Dec 2021 10:10:40 -0800 +Subject: selftests/bpf: Add CO-RE relocations to verifier scale test. +Patch-mainline: v5.17-rc1 +Git-commit: 098dc5335a2083223c80d058ab4d23f6ce120b97 +References: jsc#PED-1368 + +Add 182 CO-RE relocations to verifier scale test. + +Signed-off-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211201181040.23337-18-alexei.starovoitov@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/progs/test_verif_scale2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/tools/testing/selftests/bpf/progs/test_verif_scale2.c ++++ b/tools/testing/selftests/bpf/progs/test_verif_scale2.c +@@ -1,11 +1,11 @@ + // SPDX-License-Identifier: GPL-2.0 + // Copyright (c) 2019 Facebook +-#include ++#include "vmlinux.h" + #include + #define ATTR __always_inline + #include "test_jhash.h" + +-SEC("scale90_inline") ++SEC("tc") + int balancer_ingress(struct __sk_buff *ctx) + { + void *data_end = (void *)(long)ctx->data_end; diff --git a/patches.suse/selftests-bpf-Add-a-C-test-for-btf_type_tag.patch b/patches.suse/selftests-bpf-Add-a-C-test-for-btf_type_tag.patch new file mode 100644 index 0000000..28a6251 --- /dev/null +++ b/patches.suse/selftests-bpf-Add-a-C-test-for-btf_type_tag.patch @@ -0,0 +1,112 @@ +From: Yonghong Song +Date: Thu, 11 Nov 2021 17:26:46 -0800 +Subject: selftests/bpf: Add a C test for btf_type_tag +Patch-mainline: v5.17-rc1 +Git-commit: 5698a42a73a1d9cb7efd31ca1bf35daa87f5e1a9 +References: jsc#PED-1368 + +The following is the main btf_type_tag usage in the +C test: + #define __tag1 __attribute__((btf_type_tag("tag1"))) + #define __tag2 __attribute__((btf_type_tag("tag2"))) + struct btf_type_tag_test { + int __tag1 * __tag1 __tag2 *p; + } g; + +The bpftool raw dump with related types: + [4] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED + [11] STRUCT 'btf_type_tag_test' size=8 vlen=1 + 'p' type_id=14 bits_offset=0 + [12] TYPE_TAG 'tag1' type_id=16 + [13] TYPE_TAG 'tag2' type_id=12 + [14] PTR '(anon)' type_id=13 + [15] TYPE_TAG 'tag1' type_id=4 + [16] PTR '(anon)' type_id=15 + [17] VAR 'g' type_id=11, linkage=global + +With format C dump, we have + struct btf_type_tag_test { + int __attribute__((btf_type_tag("tag1"))) * __attribute__((btf_type_tag("tag1"))) __attribute__((btf_type_tag("tag2"))) *p; + }; +The result C code is identical to the original definition except macro's are gone. + +Signed-off-by: Yonghong Song +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211112012646.1508231-1-yhs@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/btf_tag.c | 24 ++++++++++++++++++++++ + tools/testing/selftests/bpf/progs/btf_type_tag.c | 25 +++++++++++++++++++++++ + 2 files changed, 49 insertions(+) + create mode 100644 tools/testing/selftests/bpf/progs/btf_type_tag.c + +--- a/tools/testing/selftests/bpf/prog_tests/btf_tag.c ++++ b/tools/testing/selftests/bpf/prog_tests/btf_tag.c +@@ -3,6 +3,12 @@ + #include + #include "btf_decl_tag.skel.h" + ++/* struct btf_type_tag_test is referenced in btf_type_tag.skel.h */ ++struct btf_type_tag_test { ++ int **p; ++}; ++#include "btf_type_tag.skel.h" ++ + static void test_btf_decl_tag(void) + { + struct btf_decl_tag *skel; +@@ -19,8 +25,26 @@ static void test_btf_decl_tag(void) + btf_decl_tag__destroy(skel); + } + ++static void test_btf_type_tag(void) ++{ ++ struct btf_type_tag *skel; ++ ++ skel = btf_type_tag__open_and_load(); ++ if (!ASSERT_OK_PTR(skel, "btf_type_tag")) ++ return; ++ ++ if (skel->rodata->skip_tests) { ++ printf("%s:SKIP: btf_type_tag attribute not supported", __func__); ++ test__skip(); ++ } ++ ++ btf_type_tag__destroy(skel); ++} ++ + void test_btf_tag(void) + { + if (test__start_subtest("btf_decl_tag")) + test_btf_decl_tag(); ++ if (test__start_subtest("btf_type_tag")) ++ test_btf_type_tag(); + } +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/btf_type_tag.c +@@ -0,0 +1,25 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) 2021 Facebook */ ++#include "vmlinux.h" ++#include ++#include ++ ++#if __has_attribute(btf_type_tag) ++#define __tag1 __attribute__((btf_type_tag("tag1"))) ++#define __tag2 __attribute__((btf_type_tag("tag2"))) ++volatile const bool skip_tests = false; ++#else ++#define __tag1 ++#define __tag2 ++volatile const bool skip_tests = true; ++#endif ++ ++struct btf_type_tag_test { ++ int __tag1 * __tag1 __tag2 *p; ++} g; ++ ++SEC("fentry/bpf_fentry_test1") ++int BPF_PROG(sub, int x) ++{ ++ return 0; ++} diff --git a/patches.suse/selftests-bpf-Add-a-dedup-selftest-with-equivalent-s.patch b/patches.suse/selftests-bpf-Add-a-dedup-selftest-with-equivalent-s.patch new file mode 100644 index 0000000..7b38b66 --- /dev/null +++ b/patches.suse/selftests-bpf-Add-a-dedup-selftest-with-equivalent-s.patch @@ -0,0 +1,58 @@ +From: Yonghong Song +Date: Mon, 15 Nov 2021 08:39:43 -0800 +Subject: selftests/bpf: Add a dedup selftest with equivalent structure types +Patch-mainline: v5.17-rc1 +Git-commit: 4746158305e98c91c479539d53ef9bf8c520dd66 +References: jsc#PED-1368 + +Without previous libbpf patch, the following error will occur: + + $ ./test_progs -t btf + ... + do_test_dedup:FAIL:check btf_dedup failed errno:-22#13/205 btf/dedup: btf_type_tag #5, struct:FAIL + +And the previous libbpf patch fixed the issue. + +Signed-off-by: Yonghong Song +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211115163943.3922547-1-yhs@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/btf.c | 26 ++++++++++++++++++++++++++ + 1 file changed, 26 insertions(+) + +--- a/tools/testing/selftests/bpf/prog_tests/btf.c ++++ b/tools/testing/selftests/bpf/prog_tests/btf.c +@@ -7352,6 +7352,32 @@ static struct btf_dedup_test dedup_tests + BTF_STR_SEC("\0tag1"), + }, + }, ++{ ++ .descr = "dedup: btf_type_tag #5, struct", ++ .input = { ++ .raw_types = { ++ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ ++ BTF_TYPE_ENC(NAME_NTH(2), BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 4), /* [3] */ ++ BTF_MEMBER_ENC(NAME_NTH(3), 2, BTF_MEMBER_OFFSET(0, 0)), ++ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [4] */ ++ BTF_TYPE_ENC(NAME_NTH(2), BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 4), /* [5] */ ++ BTF_MEMBER_ENC(NAME_NTH(3), 4, BTF_MEMBER_OFFSET(0, 0)), ++ BTF_END_RAW, ++ }, ++ BTF_STR_SEC("\0tag1\0t\0m"), ++ }, ++ .expect = { ++ .raw_types = { ++ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ ++ BTF_TYPE_ENC(NAME_NTH(2), BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 4), /* [3] */ ++ BTF_MEMBER_ENC(NAME_NTH(3), 2, BTF_MEMBER_OFFSET(0, 0)), ++ BTF_END_RAW, ++ }, ++ BTF_STR_SEC("\0tag1\0t\0m"), ++ }, ++}, + + }; + diff --git a/patches.suse/selftests-bpf-Add-benchmark-for-bpf_strncmp-helper.patch b/patches.suse/selftests-bpf-Add-benchmark-for-bpf_strncmp-helper.patch new file mode 100644 index 0000000..2c9c7b1 --- /dev/null +++ b/patches.suse/selftests-bpf-Add-benchmark-for-bpf_strncmp-helper.patch @@ -0,0 +1,375 @@ +From: Hou Tao +Date: Fri, 10 Dec 2021 22:16:51 +0800 +Subject: selftests/bpf: Add benchmark for bpf_strncmp() helper +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.17-rc1 +Git-commit: 9c42652f8be3202ad11cf4fbc358688003cff21c +References: jsc#PED-1368 + +Add benchmark to compare the performance between home-made strncmp() +in bpf program and bpf_strncmp() helper. In summary, the performance +win of bpf_strncmp() under x86-64 is greater than 18% when the compared +string length is greater than 64, and is 179% when the length is 4095. +Under arm64 the performance win is even bigger: 33% when the length +is greater than 64 and 600% when the length is 4095. + +The following is the details: + +no-helper-X: use home-made strncmp() to compare X-sized string +helper-Y: use bpf_strncmp() to compare Y-sized string + +Under x86-64: + +no-helper-1 3.504 ± 0.000M/s (drops 0.000 ± 0.000M/s) +helper-1 3.347 ± 0.001M/s (drops 0.000 ± 0.000M/s) + +no-helper-8 3.357 ± 0.001M/s (drops 0.000 ± 0.000M/s) +helper-8 3.307 ± 0.001M/s (drops 0.000 ± 0.000M/s) + +no-helper-32 3.064 ± 0.000M/s (drops 0.000 ± 0.000M/s) +helper-32 3.253 ± 0.001M/s (drops 0.000 ± 0.000M/s) + +no-helper-64 2.563 ± 0.001M/s (drops 0.000 ± 0.000M/s) +helper-64 3.040 ± 0.001M/s (drops 0.000 ± 0.000M/s) + +no-helper-128 1.975 ± 0.000M/s (drops 0.000 ± 0.000M/s) +helper-128 2.641 ± 0.000M/s (drops 0.000 ± 0.000M/s) + +no-helper-512 0.759 ± 0.000M/s (drops 0.000 ± 0.000M/s) +helper-512 1.574 ± 0.000M/s (drops 0.000 ± 0.000M/s) + +no-helper-2048 0.329 ± 0.000M/s (drops 0.000 ± 0.000M/s) +helper-2048 0.602 ± 0.000M/s (drops 0.000 ± 0.000M/s) + +no-helper-4095 0.117 ± 0.000M/s (drops 0.000 ± 0.000M/s) +helper-4095 0.327 ± 0.000M/s (drops 0.000 ± 0.000M/s) + +Under arm64: + +no-helper-1 2.806 ± 0.004M/s (drops 0.000 ± 0.000M/s) +helper-1 2.819 ± 0.002M/s (drops 0.000 ± 0.000M/s) + +no-helper-8 2.797 ± 0.109M/s (drops 0.000 ± 0.000M/s) +helper-8 2.786 ± 0.025M/s (drops 0.000 ± 0.000M/s) + +no-helper-32 2.399 ± 0.011M/s (drops 0.000 ± 0.000M/s) +helper-32 2.703 ± 0.002M/s (drops 0.000 ± 0.000M/s) + +no-helper-64 2.020 ± 0.015M/s (drops 0.000 ± 0.000M/s) +helper-64 2.702 ± 0.073M/s (drops 0.000 ± 0.000M/s) + +no-helper-128 1.604 ± 0.001M/s (drops 0.000 ± 0.000M/s) +helper-128 2.516 ± 0.002M/s (drops 0.000 ± 0.000M/s) + +no-helper-512 0.699 ± 0.000M/s (drops 0.000 ± 0.000M/s) +helper-512 2.106 ± 0.003M/s (drops 0.000 ± 0.000M/s) + +no-helper-2048 0.215 ± 0.000M/s (drops 0.000 ± 0.000M/s) +helper-2048 1.223 ± 0.003M/s (drops 0.000 ± 0.000M/s) + +no-helper-4095 0.112 ± 0.000M/s (drops 0.000 ± 0.000M/s) +helper-4095 0.796 ± 0.000M/s (drops 0.000 ± 0.000M/s) + +Signed-off-by: Hou Tao +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211210141652.877186-4-houtao1@huawei.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/Makefile | 4 + tools/testing/selftests/bpf/bench.c | 6 + tools/testing/selftests/bpf/benchs/bench_strncmp.c | 161 ++++++++++++++++ + tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh | 12 + + tools/testing/selftests/bpf/progs/strncmp_bench.c | 50 ++++ + 5 files changed, 232 insertions(+), 1 deletion(-) + create mode 100644 tools/testing/selftests/bpf/benchs/bench_strncmp.c + create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh + create mode 100644 tools/testing/selftests/bpf/progs/strncmp_bench.c + +--- a/tools/testing/selftests/bpf/Makefile ++++ b/tools/testing/selftests/bpf/Makefile +@@ -537,6 +537,7 @@ $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ri + $(OUTPUT)/perfbuf_bench.skel.h + $(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h + $(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h ++$(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h + $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) + $(OUTPUT)/bench: LDLIBS += -lm + $(OUTPUT)/bench: $(OUTPUT)/bench.o \ +@@ -547,7 +548,8 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ + $(OUTPUT)/bench_trigger.o \ + $(OUTPUT)/bench_ringbufs.o \ + $(OUTPUT)/bench_bloom_filter_map.o \ +- $(OUTPUT)/bench_bpf_loop.o ++ $(OUTPUT)/bench_bpf_loop.o \ ++ $(OUTPUT)/bench_strncmp.o + $(call msg,BINARY,,$@) + $(Q)$(CC) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ + +--- a/tools/testing/selftests/bpf/bench.c ++++ b/tools/testing/selftests/bpf/bench.c +@@ -205,11 +205,13 @@ static const struct argp_option opts[] = + extern struct argp bench_ringbufs_argp; + extern struct argp bench_bloom_map_argp; + extern struct argp bench_bpf_loop_argp; ++extern struct argp bench_strncmp_argp; + + static const struct argp_child bench_parsers[] = { + { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, + { &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 }, + { &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 }, ++ { &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 }, + {}, + }; + +@@ -409,6 +411,8 @@ extern const struct bench bench_bloom_fa + extern const struct bench bench_hashmap_without_bloom; + extern const struct bench bench_hashmap_with_bloom; + extern const struct bench bench_bpf_loop; ++extern const struct bench bench_strncmp_no_helper; ++extern const struct bench bench_strncmp_helper; + + static const struct bench *benchs[] = { + &bench_count_global, +@@ -441,6 +445,8 @@ static const struct bench *benchs[] = { + &bench_hashmap_without_bloom, + &bench_hashmap_with_bloom, + &bench_bpf_loop, ++ &bench_strncmp_no_helper, ++ &bench_strncmp_helper, + }; + + static void setup_benchmark() +--- /dev/null ++++ b/tools/testing/selftests/bpf/benchs/bench_strncmp.c +@@ -0,0 +1,161 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (C) 2021. Huawei Technologies Co., Ltd */ ++#include ++#include "bench.h" ++#include "strncmp_bench.skel.h" ++ ++static struct strncmp_ctx { ++ struct strncmp_bench *skel; ++} ctx; ++ ++static struct strncmp_args { ++ u32 cmp_str_len; ++} args = { ++ .cmp_str_len = 32, ++}; ++ ++enum { ++ ARG_CMP_STR_LEN = 5000, ++}; ++ ++static const struct argp_option opts[] = { ++ { "cmp-str-len", ARG_CMP_STR_LEN, "CMP_STR_LEN", 0, ++ "Set the length of compared string" }, ++ {}, ++}; ++ ++static error_t strncmp_parse_arg(int key, char *arg, struct argp_state *state) ++{ ++ switch (key) { ++ case ARG_CMP_STR_LEN: ++ args.cmp_str_len = strtoul(arg, NULL, 10); ++ if (!args.cmp_str_len || ++ args.cmp_str_len >= sizeof(ctx.skel->bss->str)) { ++ fprintf(stderr, "Invalid cmp str len (limit %zu)\n", ++ sizeof(ctx.skel->bss->str)); ++ argp_usage(state); ++ } ++ break; ++ default: ++ return ARGP_ERR_UNKNOWN; ++ } ++ ++ return 0; ++} ++ ++const struct argp bench_strncmp_argp = { ++ .options = opts, ++ .parser = strncmp_parse_arg, ++}; ++ ++static void strncmp_validate(void) ++{ ++ if (env.consumer_cnt != 1) { ++ fprintf(stderr, "strncmp benchmark doesn't support multi-consumer!\n"); ++ exit(1); ++ } ++} ++ ++static void strncmp_setup(void) ++{ ++ int err; ++ char *target; ++ size_t i, sz; ++ ++ sz = sizeof(ctx.skel->rodata->target); ++ if (!sz || sz < sizeof(ctx.skel->bss->str)) { ++ fprintf(stderr, "invalid string size (target %zu, src %zu)\n", ++ sz, sizeof(ctx.skel->bss->str)); ++ exit(1); ++ } ++ ++ setup_libbpf(); ++ ++ ctx.skel = strncmp_bench__open(); ++ if (!ctx.skel) { ++ fprintf(stderr, "failed to open skeleton\n"); ++ exit(1); ++ } ++ ++ srandom(time(NULL)); ++ target = ctx.skel->rodata->target; ++ for (i = 0; i < sz - 1; i++) ++ target[i] = '1' + random() % 9; ++ target[sz - 1] = '\0'; ++ ++ ctx.skel->rodata->cmp_str_len = args.cmp_str_len; ++ ++ memcpy(ctx.skel->bss->str, target, args.cmp_str_len); ++ ctx.skel->bss->str[args.cmp_str_len] = '\0'; ++ /* Make bss->str < rodata->target */ ++ ctx.skel->bss->str[args.cmp_str_len - 1] -= 1; ++ ++ err = strncmp_bench__load(ctx.skel); ++ if (err) { ++ fprintf(stderr, "failed to load skeleton\n"); ++ strncmp_bench__destroy(ctx.skel); ++ exit(1); ++ } ++} ++ ++static void strncmp_attach_prog(struct bpf_program *prog) ++{ ++ struct bpf_link *link; ++ ++ link = bpf_program__attach(prog); ++ if (!link) { ++ fprintf(stderr, "failed to attach program!\n"); ++ exit(1); ++ } ++} ++ ++static void strncmp_no_helper_setup(void) ++{ ++ strncmp_setup(); ++ strncmp_attach_prog(ctx.skel->progs.strncmp_no_helper); ++} ++ ++static void strncmp_helper_setup(void) ++{ ++ strncmp_setup(); ++ strncmp_attach_prog(ctx.skel->progs.strncmp_helper); ++} ++ ++static void *strncmp_producer(void *ctx) ++{ ++ while (true) ++ (void)syscall(__NR_getpgid); ++ return NULL; ++} ++ ++static void *strncmp_consumer(void *ctx) ++{ ++ return NULL; ++} ++ ++static void strncmp_measure(struct bench_res *res) ++{ ++ res->hits = atomic_swap(&ctx.skel->bss->hits, 0); ++} ++ ++const struct bench bench_strncmp_no_helper = { ++ .name = "strncmp-no-helper", ++ .validate = strncmp_validate, ++ .setup = strncmp_no_helper_setup, ++ .producer_thread = strncmp_producer, ++ .consumer_thread = strncmp_consumer, ++ .measure = strncmp_measure, ++ .report_progress = hits_drops_report_progress, ++ .report_final = hits_drops_report_final, ++}; ++ ++const struct bench bench_strncmp_helper = { ++ .name = "strncmp-helper", ++ .validate = strncmp_validate, ++ .setup = strncmp_helper_setup, ++ .producer_thread = strncmp_producer, ++ .consumer_thread = strncmp_consumer, ++ .measure = strncmp_measure, ++ .report_progress = hits_drops_report_progress, ++ .report_final = hits_drops_report_final, ++}; +--- /dev/null ++++ b/tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh +@@ -0,0 +1,12 @@ ++#!/bin/bash ++# SPDX-License-Identifier: GPL-2.0 ++ ++source ./benchs/run_common.sh ++ ++set -eufo pipefail ++ ++for s in 1 8 64 512 2048 4095; do ++ for b in no-helper helper; do ++ summarize ${b}-${s} "$($RUN_BENCH --cmp-str-len=$s strncmp-${b})" ++ done ++done +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/strncmp_bench.c +@@ -0,0 +1,50 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (C) 2021. Huawei Technologies Co., Ltd */ ++#include ++#include ++#include ++#include ++ ++#define STRNCMP_STR_SZ 4096 ++ ++/* Will be updated by benchmark before program loading */ ++const volatile unsigned int cmp_str_len = 1; ++const char target[STRNCMP_STR_SZ]; ++ ++long hits = 0; ++char str[STRNCMP_STR_SZ]; ++ ++char _license[] SEC("license") = "GPL"; ++ ++static __always_inline int local_strncmp(const char *s1, unsigned int sz, ++ const char *s2) ++{ ++ int ret = 0; ++ unsigned int i; ++ ++ for (i = 0; i < sz; i++) { ++ /* E.g. 0xff > 0x31 */ ++ ret = (unsigned char)s1[i] - (unsigned char)s2[i]; ++ if (ret || !s1[i]) ++ break; ++ } ++ ++ return ret; ++} ++ ++SEC("tp/syscalls/sys_enter_getpgid") ++int strncmp_no_helper(void *ctx) ++{ ++ if (local_strncmp(str, cmp_str_len + 1, target) < 0) ++ __sync_add_and_fetch(&hits, 1); ++ return 0; ++} ++ ++SEC("tp/syscalls/sys_enter_getpgid") ++int strncmp_helper(void *ctx) ++{ ++ if (bpf_strncmp(str, cmp_str_len + 1, target) < 0) ++ __sync_add_and_fetch(&hits, 1); ++ return 0; ++} ++ diff --git a/patches.suse/selftests-bpf-Add-bpf_loop-test.patch b/patches.suse/selftests-bpf-Add-bpf_loop-test.patch new file mode 100644 index 0000000..d19d979 --- /dev/null +++ b/patches.suse/selftests-bpf-Add-bpf_loop-test.patch @@ -0,0 +1,285 @@ +From: Joanne Koong +Date: Mon, 29 Nov 2021 19:06:20 -0800 +Subject: selftests/bpf: Add bpf_loop test +Patch-mainline: v5.17-rc1 +Git-commit: 4e5070b64b375a9c1f570893cfceeba108382bef +References: jsc#PED-1368 + +Add test for bpf_loop testing a variety of cases: +various nr_loops, null callback ctx, invalid flags, nested callbacks. + +Signed-off-by: Joanne Koong +Signed-off-by: Alexei Starovoitov +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211130030622.4131246-3-joannekoong@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/bpf_loop.c | 145 ++++++++++++++++++++++ + tools/testing/selftests/bpf/progs/bpf_loop.c | 112 ++++++++++++++++ + 2 files changed, 257 insertions(+) + create mode 100644 tools/testing/selftests/bpf/prog_tests/bpf_loop.c + create mode 100644 tools/testing/selftests/bpf/progs/bpf_loop.c + +--- /dev/null ++++ b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c +@@ -0,0 +1,145 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) 2021 Facebook */ ++ ++#include ++#include ++#include "bpf_loop.skel.h" ++ ++static void check_nr_loops(struct bpf_loop *skel) ++{ ++ struct bpf_link *link; ++ ++ link = bpf_program__attach(skel->progs.test_prog); ++ if (!ASSERT_OK_PTR(link, "link")) ++ return; ++ ++ /* test 0 loops */ ++ skel->bss->nr_loops = 0; ++ ++ usleep(1); ++ ++ ASSERT_EQ(skel->bss->nr_loops_returned, skel->bss->nr_loops, ++ "0 loops"); ++ ++ /* test 500 loops */ ++ skel->bss->nr_loops = 500; ++ ++ usleep(1); ++ ++ ASSERT_EQ(skel->bss->nr_loops_returned, skel->bss->nr_loops, ++ "500 loops"); ++ ASSERT_EQ(skel->bss->g_output, (500 * 499) / 2, "g_output"); ++ ++ /* test exceeding the max limit */ ++ skel->bss->nr_loops = -1; ++ ++ usleep(1); ++ ++ ASSERT_EQ(skel->bss->err, -E2BIG, "over max limit"); ++ ++ bpf_link__destroy(link); ++} ++ ++static void check_callback_fn_stop(struct bpf_loop *skel) ++{ ++ struct bpf_link *link; ++ ++ link = bpf_program__attach(skel->progs.test_prog); ++ if (!ASSERT_OK_PTR(link, "link")) ++ return; ++ ++ /* testing that loop is stopped when callback_fn returns 1 */ ++ skel->bss->nr_loops = 400; ++ skel->data->stop_index = 50; ++ ++ usleep(1); ++ ++ ASSERT_EQ(skel->bss->nr_loops_returned, skel->data->stop_index + 1, ++ "nr_loops_returned"); ++ ASSERT_EQ(skel->bss->g_output, (50 * 49) / 2, ++ "g_output"); ++ ++ bpf_link__destroy(link); ++} ++ ++static void check_null_callback_ctx(struct bpf_loop *skel) ++{ ++ struct bpf_link *link; ++ ++ /* check that user is able to pass in a null callback_ctx */ ++ link = bpf_program__attach(skel->progs.prog_null_ctx); ++ if (!ASSERT_OK_PTR(link, "link")) ++ return; ++ ++ skel->bss->nr_loops = 10; ++ ++ usleep(1); ++ ++ ASSERT_EQ(skel->bss->nr_loops_returned, skel->bss->nr_loops, ++ "nr_loops_returned"); ++ ++ bpf_link__destroy(link); ++} ++ ++static void check_invalid_flags(struct bpf_loop *skel) ++{ ++ struct bpf_link *link; ++ ++ /* check that passing in non-zero flags returns -EINVAL */ ++ link = bpf_program__attach(skel->progs.prog_invalid_flags); ++ if (!ASSERT_OK_PTR(link, "link")) ++ return; ++ ++ usleep(1); ++ ++ ASSERT_EQ(skel->bss->err, -EINVAL, "err"); ++ ++ bpf_link__destroy(link); ++} ++ ++static void check_nested_calls(struct bpf_loop *skel) ++{ ++ __u32 nr_loops = 100, nested_callback_nr_loops = 4; ++ struct bpf_link *link; ++ ++ /* check that nested calls are supported */ ++ link = bpf_program__attach(skel->progs.prog_nested_calls); ++ if (!ASSERT_OK_PTR(link, "link")) ++ return; ++ ++ skel->bss->nr_loops = nr_loops; ++ skel->bss->nested_callback_nr_loops = nested_callback_nr_loops; ++ ++ usleep(1); ++ ++ ASSERT_EQ(skel->bss->nr_loops_returned, nr_loops * nested_callback_nr_loops ++ * nested_callback_nr_loops, "nr_loops_returned"); ++ ASSERT_EQ(skel->bss->g_output, (4 * 3) / 2 * nested_callback_nr_loops ++ * nr_loops, "g_output"); ++ ++ bpf_link__destroy(link); ++} ++ ++void test_bpf_loop(void) ++{ ++ struct bpf_loop *skel; ++ ++ skel = bpf_loop__open_and_load(); ++ if (!ASSERT_OK_PTR(skel, "bpf_loop__open_and_load")) ++ return; ++ ++ skel->bss->pid = getpid(); ++ ++ if (test__start_subtest("check_nr_loops")) ++ check_nr_loops(skel); ++ if (test__start_subtest("check_callback_fn_stop")) ++ check_callback_fn_stop(skel); ++ if (test__start_subtest("check_null_callback_ctx")) ++ check_null_callback_ctx(skel); ++ if (test__start_subtest("check_invalid_flags")) ++ check_invalid_flags(skel); ++ if (test__start_subtest("check_nested_calls")) ++ check_nested_calls(skel); ++ ++ bpf_loop__destroy(skel); ++} +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/bpf_loop.c +@@ -0,0 +1,112 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) 2021 Facebook */ ++ ++#include "vmlinux.h" ++#include ++ ++char _license[] SEC("license") = "GPL"; ++ ++struct callback_ctx { ++ int output; ++}; ++ ++/* These should be set by the user program */ ++u32 nested_callback_nr_loops; ++u32 stop_index = -1; ++u32 nr_loops; ++int pid; ++ ++/* Making these global variables so that the userspace program ++ * can verify the output through the skeleton ++ */ ++int nr_loops_returned; ++int g_output; ++int err; ++ ++static int callback(__u32 index, void *data) ++{ ++ struct callback_ctx *ctx = data; ++ ++ if (index >= stop_index) ++ return 1; ++ ++ ctx->output += index; ++ ++ return 0; ++} ++ ++static int empty_callback(__u32 index, void *data) ++{ ++ return 0; ++} ++ ++static int nested_callback2(__u32 index, void *data) ++{ ++ nr_loops_returned += bpf_loop(nested_callback_nr_loops, callback, data, 0); ++ ++ return 0; ++} ++ ++static int nested_callback1(__u32 index, void *data) ++{ ++ bpf_loop(nested_callback_nr_loops, nested_callback2, data, 0); ++ return 0; ++} ++ ++SEC("fentry/__x64_sys_nanosleep") ++int test_prog(void *ctx) ++{ ++ struct callback_ctx data = {}; ++ ++ if (bpf_get_current_pid_tgid() >> 32 != pid) ++ return 0; ++ ++ nr_loops_returned = bpf_loop(nr_loops, callback, &data, 0); ++ ++ if (nr_loops_returned < 0) ++ err = nr_loops_returned; ++ else ++ g_output = data.output; ++ ++ return 0; ++} ++ ++SEC("fentry/__x64_sys_nanosleep") ++int prog_null_ctx(void *ctx) ++{ ++ if (bpf_get_current_pid_tgid() >> 32 != pid) ++ return 0; ++ ++ nr_loops_returned = bpf_loop(nr_loops, empty_callback, NULL, 0); ++ ++ return 0; ++} ++ ++SEC("fentry/__x64_sys_nanosleep") ++int prog_invalid_flags(void *ctx) ++{ ++ struct callback_ctx data = {}; ++ ++ if (bpf_get_current_pid_tgid() >> 32 != pid) ++ return 0; ++ ++ err = bpf_loop(nr_loops, callback, &data, 1); ++ ++ return 0; ++} ++ ++SEC("fentry/__x64_sys_nanosleep") ++int prog_nested_calls(void *ctx) ++{ ++ struct callback_ctx data = {}; ++ ++ if (bpf_get_current_pid_tgid() >> 32 != pid) ++ return 0; ++ ++ nr_loops_returned = 0; ++ bpf_loop(nr_loops, nested_callback1, &data, 0); ++ ++ g_output = data.output; ++ ++ return 0; ++} diff --git a/patches.suse/selftests-bpf-Add-btf_dedup-case-with-duplicated-str.patch b/patches.suse/selftests-bpf-Add-btf_dedup-case-with-duplicated-str.patch new file mode 100644 index 0000000..f3b64bc --- /dev/null +++ b/patches.suse/selftests-bpf-Add-btf_dedup-case-with-duplicated-str.patch @@ -0,0 +1,147 @@ +From: Jiri Olsa +Date: Wed, 17 Nov 2021 11:41:14 -0800 +Subject: selftests/bpf: Add btf_dedup case with duplicated structs within CU +Patch-mainline: v5.17-rc1 +Git-commit: 9a49afe6f5a516eb33bec24be0f81cb35ca79445 +References: jsc#PED-1368 + +Add an artificial minimal example simulating compilers producing two +different types within a single CU that correspond to identical struct +definitions. + +Signed-off-by: Jiri Olsa +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211117194114.347675-2-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c | 113 +++++++++++++++ + 1 file changed, 113 insertions(+) + +--- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c ++++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c +@@ -314,6 +314,117 @@ cleanup: + btf__free(btf1); + } + ++static void btf_add_dup_struct_in_cu(struct btf *btf, int start_id) ++{ ++#define ID(n) (start_id + n) ++ btf__set_pointer_size(btf, 8); /* enforce 64-bit arch */ ++ ++ btf__add_int(btf, "int", 4, BTF_INT_SIGNED); /* [1] int */ ++ ++ btf__add_struct(btf, "s", 8); /* [2] struct s { */ ++ btf__add_field(btf, "a", ID(3), 0, 0); /* struct anon a; */ ++ btf__add_field(btf, "b", ID(4), 0, 0); /* struct anon b; */ ++ /* } */ ++ ++ btf__add_struct(btf, "(anon)", 8); /* [3] struct anon { */ ++ btf__add_field(btf, "f1", ID(1), 0, 0); /* int f1; */ ++ btf__add_field(btf, "f2", ID(1), 32, 0); /* int f2; */ ++ /* } */ ++ ++ btf__add_struct(btf, "(anon)", 8); /* [4] struct anon { */ ++ btf__add_field(btf, "f1", ID(1), 0, 0); /* int f1; */ ++ btf__add_field(btf, "f2", ID(1), 32, 0); /* int f2; */ ++ /* } */ ++#undef ID ++} ++ ++static void test_split_dup_struct_in_cu() ++{ ++ struct btf *btf1, *btf2; ++ int err; ++ ++ /* generate the base data.. */ ++ btf1 = btf__new_empty(); ++ if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) ++ return; ++ ++ btf_add_dup_struct_in_cu(btf1, 0); ++ ++ VALIDATE_RAW_BTF( ++ btf1, ++ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", ++ "[2] STRUCT 's' size=8 vlen=2\n" ++ "\t'a' type_id=3 bits_offset=0\n" ++ "\t'b' type_id=4 bits_offset=0", ++ "[3] STRUCT '(anon)' size=8 vlen=2\n" ++ "\t'f1' type_id=1 bits_offset=0\n" ++ "\t'f2' type_id=1 bits_offset=32", ++ "[4] STRUCT '(anon)' size=8 vlen=2\n" ++ "\t'f1' type_id=1 bits_offset=0\n" ++ "\t'f2' type_id=1 bits_offset=32"); ++ ++ /* ..dedup them... */ ++ err = btf__dedup(btf1, NULL, NULL); ++ if (!ASSERT_OK(err, "btf_dedup")) ++ goto cleanup; ++ ++ VALIDATE_RAW_BTF( ++ btf1, ++ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", ++ "[2] STRUCT 's' size=8 vlen=2\n" ++ "\t'a' type_id=3 bits_offset=0\n" ++ "\t'b' type_id=3 bits_offset=0", ++ "[3] STRUCT '(anon)' size=8 vlen=2\n" ++ "\t'f1' type_id=1 bits_offset=0\n" ++ "\t'f2' type_id=1 bits_offset=32"); ++ ++ /* and add the same data on top of it */ ++ btf2 = btf__new_empty_split(btf1); ++ if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) ++ goto cleanup; ++ ++ btf_add_dup_struct_in_cu(btf2, 3); ++ ++ VALIDATE_RAW_BTF( ++ btf2, ++ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", ++ "[2] STRUCT 's' size=8 vlen=2\n" ++ "\t'a' type_id=3 bits_offset=0\n" ++ "\t'b' type_id=3 bits_offset=0", ++ "[3] STRUCT '(anon)' size=8 vlen=2\n" ++ "\t'f1' type_id=1 bits_offset=0\n" ++ "\t'f2' type_id=1 bits_offset=32", ++ "[4] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", ++ "[5] STRUCT 's' size=8 vlen=2\n" ++ "\t'a' type_id=6 bits_offset=0\n" ++ "\t'b' type_id=7 bits_offset=0", ++ "[6] STRUCT '(anon)' size=8 vlen=2\n" ++ "\t'f1' type_id=4 bits_offset=0\n" ++ "\t'f2' type_id=4 bits_offset=32", ++ "[7] STRUCT '(anon)' size=8 vlen=2\n" ++ "\t'f1' type_id=4 bits_offset=0\n" ++ "\t'f2' type_id=4 bits_offset=32"); ++ ++ err = btf__dedup(btf2, NULL, NULL); ++ if (!ASSERT_OK(err, "btf_dedup")) ++ goto cleanup; ++ ++ /* after dedup it should match the original data */ ++ VALIDATE_RAW_BTF( ++ btf2, ++ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", ++ "[2] STRUCT 's' size=8 vlen=2\n" ++ "\t'a' type_id=3 bits_offset=0\n" ++ "\t'b' type_id=3 bits_offset=0", ++ "[3] STRUCT '(anon)' size=8 vlen=2\n" ++ "\t'f1' type_id=1 bits_offset=0\n" ++ "\t'f2' type_id=1 bits_offset=32"); ++ ++cleanup: ++ btf__free(btf2); ++ btf__free(btf1); ++} ++ + void test_btf_dedup_split() + { + if (test__start_subtest("split_simple")) +@@ -322,4 +433,6 @@ void test_btf_dedup_split() + test_split_struct_duped(); + if (test__start_subtest("split_fwd_resolve")) + test_split_fwd_resolve(); ++ if (test__start_subtest("split_dup_struct_in_cu")) ++ test_split_dup_struct_in_cu(); + } diff --git a/patches.suse/selftests-bpf-Add-btf_dump__new-to-test_cpp.patch b/patches.suse/selftests-bpf-Add-btf_dump__new-to-test_cpp.patch new file mode 100644 index 0000000..24116a5 --- /dev/null +++ b/patches.suse/selftests-bpf-Add-btf_dump__new-to-test_cpp.patch @@ -0,0 +1,47 @@ +From: Jiri Olsa +Date: Thu, 23 Dec 2021 14:17:36 +0100 +Subject: selftests/bpf: Add btf_dump__new to test_cpp +Patch-mainline: v5.17-rc1 +Git-commit: ecf45e60a62dfeb65658abac02f0bdb45b786911 +References: jsc#PED-1368 + +Adding btf_dump__new call to test_cpp, so we can +test C++ compilation with that. + +Signed-off-by: Jiri Olsa +Signed-off-by: Andrii Nakryiko +Acked-by: Yonghong Song +Link: https://lore.kernel.org/bpf/20211223131736.483956-2-jolsa@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/test_cpp.cpp | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/tools/testing/selftests/bpf/test_cpp.cpp ++++ b/tools/testing/selftests/bpf/test_cpp.cpp +@@ -7,9 +7,15 @@ + + /* do nothing, just make sure we can link successfully */ + ++static void dump_printf(void *ctx, const char *fmt, va_list args) ++{ ++} ++ + int main(int argc, char *argv[]) + { ++ struct btf_dump_opts opts = { }; + struct test_core_extern *skel; ++ struct btf *btf; + + /* libbpf.h */ + libbpf_set_print(NULL); +@@ -18,7 +24,8 @@ int main(int argc, char *argv[]) + bpf_prog_get_fd_by_id(0); + + /* btf.h */ +- btf__new(NULL, 0); ++ btf = btf__new(NULL, 0); ++ btf_dump__new(btf, dump_printf, nullptr, &opts); + + /* BPF skeleton */ + skel = test_core_extern__open_and_load(); diff --git a/patches.suse/selftests-bpf-Add-exception-handling-selftests-for-t.patch b/patches.suse/selftests-bpf-Add-exception-handling-selftests-for-t.patch new file mode 100644 index 0000000..cbf7822 --- /dev/null +++ b/patches.suse/selftests-bpf-Add-exception-handling-selftests-for-t.patch @@ -0,0 +1,122 @@ +From: Alan Maguire +Date: Fri, 5 Nov 2021 16:50:46 +0000 +Subject: selftests/bpf: Add exception handling selftests for tp_bpf program +Patch-mainline: v5.17-rc1 +Git-commit: c23551c9c36ae394f9c53a5adf1944a943c65e0b +References: jsc#PED-1368 + +Exception handling is triggered in BPF tracing programs when a NULL pointer +is dereferenced; the exception handler zeroes the target register and +execution of the BPF program progresses. + +To test exception handling then, we need to trigger a NULL pointer dereference +for a field which should never be zero; if it is, the only explanation is the +exception handler ran. task->task_works is the NULL pointer chosen (for a new +task from fork() no work is associated), and the task_works->func field should +not be zero if task_works is non-NULL. The test verifies that task_works and +task_works->func are 0. + +Signed-off-by: Alan Maguire +Signed-off-by: Daniel Borkmann +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/1636131046-5982-3-git-send-email-alan.maguire@oracle.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/exhandler.c | 43 +++++++++++++++++++++ + tools/testing/selftests/bpf/progs/exhandler_kern.c | 43 +++++++++++++++++++++ + 2 files changed, 86 insertions(+) + create mode 100644 tools/testing/selftests/bpf/prog_tests/exhandler.c + create mode 100644 tools/testing/selftests/bpf/progs/exhandler_kern.c + +--- /dev/null ++++ b/tools/testing/selftests/bpf/prog_tests/exhandler.c +@@ -0,0 +1,43 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) 2021, Oracle and/or its affiliates. */ ++ ++#include ++ ++/* Test that verifies exception handling is working. fork() ++ * triggers task_newtask tracepoint; that new task will have a ++ * NULL pointer task_works, and the associated task->task_works->func ++ * should not be NULL if task_works itself is non-NULL. ++ * ++ * So to verify exception handling we want to see a NULL task_works ++ * and task_works->func; if we see this we can conclude that the ++ * exception handler ran when we attempted to dereference task->task_works ++ * and zeroed the destination register. ++ */ ++#include "exhandler_kern.skel.h" ++ ++void test_exhandler(void) ++{ ++ int err = 0, duration = 0, status; ++ struct exhandler_kern *skel; ++ pid_t cpid; ++ ++ skel = exhandler_kern__open_and_load(); ++ if (CHECK(!skel, "skel_load", "skeleton failed: %d\n", err)) ++ goto cleanup; ++ ++ skel->bss->test_pid = getpid(); ++ ++ err = exhandler_kern__attach(skel); ++ if (!ASSERT_OK(err, "attach")) ++ goto cleanup; ++ cpid = fork(); ++ if (!ASSERT_GT(cpid, -1, "fork failed")) ++ goto cleanup; ++ if (cpid == 0) ++ _exit(0); ++ waitpid(cpid, &status, 0); ++ ++ ASSERT_NEQ(skel->bss->exception_triggered, 0, "verify exceptions occurred"); ++cleanup: ++ exhandler_kern__destroy(skel); ++} +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/exhandler_kern.c +@@ -0,0 +1,43 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) 2021, Oracle and/or its affiliates. */ ++ ++#include "vmlinux.h" ++ ++#include ++#include ++#include ++ ++char _license[] SEC("license") = "GPL"; ++ ++unsigned int exception_triggered; ++int test_pid; ++ ++/* TRACE_EVENT(task_newtask, ++ * TP_PROTO(struct task_struct *p, u64 clone_flags) ++ */ ++SEC("tp_btf/task_newtask") ++int BPF_PROG(trace_task_newtask, struct task_struct *task, u64 clone_flags) ++{ ++ int pid = bpf_get_current_pid_tgid() >> 32; ++ struct callback_head *work; ++ void *func; ++ ++ if (test_pid != pid) ++ return 0; ++ ++ /* To verify we hit an exception we dereference task->task_works->func. ++ * If task work has been added, ++ * - task->task_works is non-NULL; and ++ * - task->task_works->func is non-NULL also (the callback function ++ * must be specified for the task work. ++ * ++ * However, for a newly-created task, task->task_works is NULLed, ++ * so we know the exception handler triggered if task_works is ++ * NULL and func is NULL. ++ */ ++ work = task->task_works; ++ func = work->func; ++ if (!work && !func) ++ exception_triggered++; ++ return 0; ++} diff --git a/patches.suse/selftests-bpf-Add-libbpf-feature-probing-API-selftes.patch b/patches.suse/selftests-bpf-Add-libbpf-feature-probing-API-selftes.patch new file mode 100644 index 0000000..f3ea9cd --- /dev/null +++ b/patches.suse/selftests-bpf-Add-libbpf-feature-probing-API-selftes.patch @@ -0,0 +1,168 @@ +From: Andrii Nakryiko +Date: Fri, 17 Dec 2021 09:12:01 -0800 +Subject: selftests/bpf: Add libbpf feature-probing API selftests +Patch-mainline: v5.17-rc1 +Git-commit: 5a8ea82f9d25e88e502d1c3a1a9ba639f69a63c0 +References: jsc#PED-1368 + +Add selftests for prog/map/prog+helper feature probing APIs. Prog and +map selftests are designed in such a way that they will always test all +the possible prog/map types, based on running kernel's vmlinux BTF enum +definition. This way we'll always be sure that when adding new BPF +program types or map types, libbpf will be always updated accordingly to +be able to feature-detect them. + +BPF prog_helper selftest will have to be manually extended with +interesting and important prog+helper combinations, it's easy, but can't +be completely automated. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Acked-by: Dave Marchevsky +Link: https://lore.kernel.org/bpf/20211217171202.3352835-3-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/config | 2 + tools/testing/selftests/bpf/prog_tests/libbpf_probes.c | 124 +++++++++++++++++ + 2 files changed, 126 insertions(+) + create mode 100644 tools/testing/selftests/bpf/prog_tests/libbpf_probes.c + +--- a/tools/testing/selftests/bpf/config ++++ b/tools/testing/selftests/bpf/config +@@ -38,7 +38,9 @@ CONFIG_IPV6_SIT=m + CONFIG_BPF_JIT=y + CONFIG_BPF_LSM=y + CONFIG_SECURITY=y ++CONFIG_RC_CORE=y + CONFIG_LIRC=y ++CONFIG_BPF_LIRC_MODE2=y + CONFIG_IMA=y + CONFIG_SECURITYFS=y + CONFIG_IMA_WRITE_POLICY=y +--- /dev/null ++++ b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c +@@ -0,0 +1,124 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* Copyright (c) 2021 Facebook */ ++ ++#include ++#include ++ ++void test_libbpf_probe_prog_types(void) ++{ ++ struct btf *btf; ++ const struct btf_type *t; ++ const struct btf_enum *e; ++ int i, n, id; ++ ++ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL); ++ if (!ASSERT_OK_PTR(btf, "btf_parse")) ++ return; ++ ++ /* find enum bpf_prog_type and enumerate each value */ ++ id = btf__find_by_name_kind(btf, "bpf_prog_type", BTF_KIND_ENUM); ++ if (!ASSERT_GT(id, 0, "bpf_prog_type_id")) ++ goto cleanup; ++ t = btf__type_by_id(btf, id); ++ if (!ASSERT_OK_PTR(t, "bpf_prog_type_enum")) ++ goto cleanup; ++ ++ for (e = btf_enum(t), i = 0, n = btf_vlen(t); i < n; e++, i++) { ++ const char *prog_type_name = btf__str_by_offset(btf, e->name_off); ++ enum bpf_prog_type prog_type = (enum bpf_prog_type)e->val; ++ int res; ++ ++ if (prog_type == BPF_PROG_TYPE_UNSPEC) ++ continue; ++ ++ if (!test__start_subtest(prog_type_name)) ++ continue; ++ ++ res = libbpf_probe_bpf_prog_type(prog_type, NULL); ++ ASSERT_EQ(res, 1, prog_type_name); ++ } ++ ++cleanup: ++ btf__free(btf); ++} ++ ++void test_libbpf_probe_map_types(void) ++{ ++ struct btf *btf; ++ const struct btf_type *t; ++ const struct btf_enum *e; ++ int i, n, id; ++ ++ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL); ++ if (!ASSERT_OK_PTR(btf, "btf_parse")) ++ return; ++ ++ /* find enum bpf_map_type and enumerate each value */ ++ id = btf__find_by_name_kind(btf, "bpf_map_type", BTF_KIND_ENUM); ++ if (!ASSERT_GT(id, 0, "bpf_map_type_id")) ++ goto cleanup; ++ t = btf__type_by_id(btf, id); ++ if (!ASSERT_OK_PTR(t, "bpf_map_type_enum")) ++ goto cleanup; ++ ++ for (e = btf_enum(t), i = 0, n = btf_vlen(t); i < n; e++, i++) { ++ const char *map_type_name = btf__str_by_offset(btf, e->name_off); ++ enum bpf_map_type map_type = (enum bpf_map_type)e->val; ++ int res; ++ ++ if (map_type == BPF_MAP_TYPE_UNSPEC) ++ continue; ++ ++ if (!test__start_subtest(map_type_name)) ++ continue; ++ ++ res = libbpf_probe_bpf_map_type(map_type, NULL); ++ ASSERT_EQ(res, 1, map_type_name); ++ } ++ ++cleanup: ++ btf__free(btf); ++} ++ ++void test_libbpf_probe_helpers(void) ++{ ++#define CASE(prog, helper, supp) { \ ++ .prog_type_name = "BPF_PROG_TYPE_" # prog, \ ++ .helper_name = "bpf_" # helper, \ ++ .prog_type = BPF_PROG_TYPE_ ## prog, \ ++ .helper_id = BPF_FUNC_ ## helper, \ ++ .supported = supp, \ ++} ++ const struct case_def { ++ const char *prog_type_name; ++ const char *helper_name; ++ enum bpf_prog_type prog_type; ++ enum bpf_func_id helper_id; ++ bool supported; ++ } cases[] = { ++ CASE(KPROBE, unspec, false), ++ CASE(KPROBE, map_lookup_elem, true), ++ CASE(KPROBE, loop, true), ++ ++ CASE(KPROBE, ktime_get_coarse_ns, false), ++ CASE(SOCKET_FILTER, ktime_get_coarse_ns, true), ++ ++ CASE(KPROBE, sys_bpf, false), ++ CASE(SYSCALL, sys_bpf, true), ++ }; ++ size_t case_cnt = ARRAY_SIZE(cases), i; ++ char buf[128]; ++ ++ for (i = 0; i < case_cnt; i++) { ++ const struct case_def *d = &cases[i]; ++ int res; ++ ++ snprintf(buf, sizeof(buf), "%s+%s", d->prog_type_name, d->helper_name); ++ ++ if (!test__start_subtest(buf)) ++ continue; ++ ++ res = libbpf_probe_bpf_helper(d->prog_type, d->helper_id, NULL); ++ ASSERT_EQ(res, d->supported, buf); ++ } ++} diff --git a/patches.suse/selftests-bpf-Add-lskel-version-of-kfunc-test.patch b/patches.suse/selftests-bpf-Add-lskel-version-of-kfunc-test.patch new file mode 100644 index 0000000..a0df2b2 --- /dev/null +++ b/patches.suse/selftests-bpf-Add-lskel-version-of-kfunc-test.patch @@ -0,0 +1,75 @@ +From: Alexei Starovoitov +Date: Wed, 1 Dec 2021 10:10:35 -0800 +Subject: selftests/bpf: Add lskel version of kfunc test. +Patch-mainline: v5.17-rc1 +Git-commit: bc5f75da977b2a4d9aa6827081e6c2ddd3347328 +References: jsc#PED-1368 + +Add light skeleton version of kfunc_call_test_subprog test. + +Signed-off-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211201181040.23337-13-alexei.starovoitov@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/Makefile | 2 - + tools/testing/selftests/bpf/prog_tests/kfunc_call.c | 24 ++++++++++++++++++++ + 2 files changed, 25 insertions(+), 1 deletion(-) + +--- a/tools/testing/selftests/bpf/Makefile ++++ b/tools/testing/selftests/bpf/Makefile +@@ -327,7 +327,7 @@ LINKED_SKELS := test_static_linked.skel. + LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \ + test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c + # Generate both light skeleton and libbpf skeleton for these +-LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c ++LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test_subprog.c + SKEL_BLACKLIST += $$(LSKELS) + + test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o +--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c ++++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +@@ -4,6 +4,7 @@ + #include + #include "kfunc_call_test.lskel.h" + #include "kfunc_call_test_subprog.skel.h" ++#include "kfunc_call_test_subprog.lskel.h" + + static void test_main(void) + { +@@ -49,6 +50,26 @@ static void test_subprog(void) + kfunc_call_test_subprog__destroy(skel); + } + ++static void test_subprog_lskel(void) ++{ ++ struct kfunc_call_test_subprog_lskel *skel; ++ int prog_fd, retval, err; ++ ++ skel = kfunc_call_test_subprog_lskel__open_and_load(); ++ if (!ASSERT_OK_PTR(skel, "skel")) ++ return; ++ ++ prog_fd = skel->progs.kfunc_call_test1.prog_fd; ++ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), ++ NULL, NULL, (__u32 *)&retval, NULL); ++ ASSERT_OK(err, "bpf_prog_test_run(test1)"); ++ ASSERT_EQ(retval, 10, "test1-retval"); ++ ASSERT_NEQ(skel->data->active_res, -1, "active_res"); ++ ASSERT_EQ(skel->data->sk_state_res, BPF_TCP_CLOSE, "sk_state_res"); ++ ++ kfunc_call_test_subprog_lskel__destroy(skel); ++} ++ + void test_kfunc_call(void) + { + if (test__start_subtest("main")) +@@ -56,4 +77,7 @@ void test_kfunc_call(void) + + if (test__start_subtest("subprog")) + test_subprog(); ++ ++ if (test__start_subtest("subprog_lskel")) ++ test_subprog_lskel(); + } diff --git a/patches.suse/selftests-bpf-Add-test-cases-for-bpf_strncmp.patch b/patches.suse/selftests-bpf-Add-test-cases-for-bpf_strncmp.patch new file mode 100644 index 0000000..410034e --- /dev/null +++ b/patches.suse/selftests-bpf-Add-test-cases-for-bpf_strncmp.patch @@ -0,0 +1,251 @@ +From: Hou Tao +Date: Fri, 10 Dec 2021 22:16:52 +0800 +Subject: selftests/bpf: Add test cases for bpf_strncmp() +Patch-mainline: v5.17-rc1 +Git-commit: bdbee82beca4514496c52a2dc035f2a26f0c1b88 +References: jsc#PED-1368 + +Four test cases are added: +(1) ensure the return value is expected +(2) ensure no const string size is rejected +(3) ensure writable target is rejected +(4) ensure no null-terminated target is rejected + +Signed-off-by: Hou Tao +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211210141652.877186-5-houtao1@huawei.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/test_strncmp.c | 167 ++++++++++++++++++ + tools/testing/selftests/bpf/progs/strncmp_test.c | 54 +++++ + 2 files changed, 221 insertions(+) + create mode 100644 tools/testing/selftests/bpf/prog_tests/test_strncmp.c + create mode 100644 tools/testing/selftests/bpf/progs/strncmp_test.c + +--- /dev/null ++++ b/tools/testing/selftests/bpf/prog_tests/test_strncmp.c +@@ -0,0 +1,167 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (C) 2021. Huawei Technologies Co., Ltd */ ++#include ++#include "strncmp_test.skel.h" ++ ++static int trigger_strncmp(const struct strncmp_test *skel) ++{ ++ int cmp; ++ ++ usleep(1); ++ ++ cmp = skel->bss->cmp_ret; ++ if (cmp > 0) ++ return 1; ++ if (cmp < 0) ++ return -1; ++ return 0; ++} ++ ++/* ++ * Compare str and target after making str[i] != target[i]. ++ * When exp is -1, make str[i] < target[i] and delta = -1. ++ */ ++static void strncmp_full_str_cmp(struct strncmp_test *skel, const char *name, ++ int exp) ++{ ++ size_t nr = sizeof(skel->bss->str); ++ char *str = skel->bss->str; ++ int delta = exp; ++ int got; ++ size_t i; ++ ++ memcpy(str, skel->rodata->target, nr); ++ for (i = 0; i < nr - 1; i++) { ++ str[i] += delta; ++ ++ got = trigger_strncmp(skel); ++ ASSERT_EQ(got, exp, name); ++ ++ str[i] -= delta; ++ } ++} ++ ++static void test_strncmp_ret(void) ++{ ++ struct strncmp_test *skel; ++ struct bpf_program *prog; ++ int err, got; ++ ++ skel = strncmp_test__open(); ++ if (!ASSERT_OK_PTR(skel, "strncmp_test open")) ++ return; ++ ++ bpf_object__for_each_program(prog, skel->obj) ++ bpf_program__set_autoload(prog, false); ++ ++ bpf_program__set_autoload(skel->progs.do_strncmp, true); ++ ++ err = strncmp_test__load(skel); ++ if (!ASSERT_EQ(err, 0, "strncmp_test load")) ++ goto out; ++ ++ err = strncmp_test__attach(skel); ++ if (!ASSERT_EQ(err, 0, "strncmp_test attach")) ++ goto out; ++ ++ skel->bss->target_pid = getpid(); ++ ++ /* Empty str */ ++ skel->bss->str[0] = '\0'; ++ got = trigger_strncmp(skel); ++ ASSERT_EQ(got, -1, "strncmp: empty str"); ++ ++ /* Same string */ ++ memcpy(skel->bss->str, skel->rodata->target, sizeof(skel->bss->str)); ++ got = trigger_strncmp(skel); ++ ASSERT_EQ(got, 0, "strncmp: same str"); ++ ++ /* Not-null-termainted string */ ++ memcpy(skel->bss->str, skel->rodata->target, sizeof(skel->bss->str)); ++ skel->bss->str[sizeof(skel->bss->str) - 1] = 'A'; ++ got = trigger_strncmp(skel); ++ ASSERT_EQ(got, 1, "strncmp: not-null-term str"); ++ ++ strncmp_full_str_cmp(skel, "strncmp: less than", -1); ++ strncmp_full_str_cmp(skel, "strncmp: greater than", 1); ++out: ++ strncmp_test__destroy(skel); ++} ++ ++static void test_strncmp_bad_not_const_str_size(void) ++{ ++ struct strncmp_test *skel; ++ struct bpf_program *prog; ++ int err; ++ ++ skel = strncmp_test__open(); ++ if (!ASSERT_OK_PTR(skel, "strncmp_test open")) ++ return; ++ ++ bpf_object__for_each_program(prog, skel->obj) ++ bpf_program__set_autoload(prog, false); ++ ++ bpf_program__set_autoload(skel->progs.strncmp_bad_not_const_str_size, ++ true); ++ ++ err = strncmp_test__load(skel); ++ ASSERT_ERR(err, "strncmp_test load bad_not_const_str_size"); ++ ++ strncmp_test__destroy(skel); ++} ++ ++static void test_strncmp_bad_writable_target(void) ++{ ++ struct strncmp_test *skel; ++ struct bpf_program *prog; ++ int err; ++ ++ skel = strncmp_test__open(); ++ if (!ASSERT_OK_PTR(skel, "strncmp_test open")) ++ return; ++ ++ bpf_object__for_each_program(prog, skel->obj) ++ bpf_program__set_autoload(prog, false); ++ ++ bpf_program__set_autoload(skel->progs.strncmp_bad_writable_target, ++ true); ++ ++ err = strncmp_test__load(skel); ++ ASSERT_ERR(err, "strncmp_test load bad_writable_target"); ++ ++ strncmp_test__destroy(skel); ++} ++ ++static void test_strncmp_bad_not_null_term_target(void) ++{ ++ struct strncmp_test *skel; ++ struct bpf_program *prog; ++ int err; ++ ++ skel = strncmp_test__open(); ++ if (!ASSERT_OK_PTR(skel, "strncmp_test open")) ++ return; ++ ++ bpf_object__for_each_program(prog, skel->obj) ++ bpf_program__set_autoload(prog, false); ++ ++ bpf_program__set_autoload(skel->progs.strncmp_bad_not_null_term_target, ++ true); ++ ++ err = strncmp_test__load(skel); ++ ASSERT_ERR(err, "strncmp_test load bad_not_null_term_target"); ++ ++ strncmp_test__destroy(skel); ++} ++ ++void test_test_strncmp(void) ++{ ++ if (test__start_subtest("strncmp_ret")) ++ test_strncmp_ret(); ++ if (test__start_subtest("strncmp_bad_not_const_str_size")) ++ test_strncmp_bad_not_const_str_size(); ++ if (test__start_subtest("strncmp_bad_writable_target")) ++ test_strncmp_bad_writable_target(); ++ if (test__start_subtest("strncmp_bad_not_null_term_target")) ++ test_strncmp_bad_not_null_term_target(); ++} +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/strncmp_test.c +@@ -0,0 +1,54 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (C) 2021. Huawei Technologies Co., Ltd */ ++#include ++#include ++#include ++#include ++#include ++ ++#define STRNCMP_STR_SZ 8 ++ ++const char target[STRNCMP_STR_SZ] = "EEEEEEE"; ++char str[STRNCMP_STR_SZ]; ++int cmp_ret = 0; ++int target_pid = 0; ++ ++const char no_str_target[STRNCMP_STR_SZ] = "12345678"; ++char writable_target[STRNCMP_STR_SZ]; ++unsigned int no_const_str_size = STRNCMP_STR_SZ; ++ ++char _license[] SEC("license") = "GPL"; ++ ++SEC("tp/syscalls/sys_enter_nanosleep") ++int do_strncmp(void *ctx) ++{ ++ if ((bpf_get_current_pid_tgid() >> 32) != target_pid) ++ return 0; ++ ++ cmp_ret = bpf_strncmp(str, STRNCMP_STR_SZ, target); ++ return 0; ++} ++ ++SEC("tp/syscalls/sys_enter_nanosleep") ++int strncmp_bad_not_const_str_size(void *ctx) ++{ ++ /* The value of string size is not const, so will fail */ ++ cmp_ret = bpf_strncmp(str, no_const_str_size, target); ++ return 0; ++} ++ ++SEC("tp/syscalls/sys_enter_nanosleep") ++int strncmp_bad_writable_target(void *ctx) ++{ ++ /* Compared target is not read-only, so will fail */ ++ cmp_ret = bpf_strncmp(str, STRNCMP_STR_SZ, writable_target); ++ return 0; ++} ++ ++SEC("tp/syscalls/sys_enter_nanosleep") ++int strncmp_bad_not_null_term_target(void *ctx) ++{ ++ /* Compared target is not null-terminated, so will fail */ ++ cmp_ret = bpf_strncmp(str, STRNCMP_STR_SZ, no_str_target); ++ return 0; ++} diff --git a/patches.suse/selftests-bpf-Add-test-for-bpf_timer-overwriting-cra.patch b/patches.suse/selftests-bpf-Add-test-for-bpf_timer-overwriting-cra.patch new file mode 100644 index 0000000..cb0e1e8 --- /dev/null +++ b/patches.suse/selftests-bpf-Add-test-for-bpf_timer-overwriting-cra.patch @@ -0,0 +1,114 @@ +From: Kumar Kartikeya Dwivedi +Date: Wed, 9 Feb 2022 12:33:24 +0530 +Subject: selftests/bpf: Add test for bpf_timer overwriting crash +Patch-mainline: v5.17-rc6 +Git-commit: a7e75016a0753c24d6c995bc02501ae35368e333 +References: jsc#PED-1368 + +Add a test that validates that timer value is not overwritten when doing +a copy_map_value call in the kernel. Without the prior fix, this test +triggers a crash. + +Signed-off-by: Kumar Kartikeya Dwivedi +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220209070324.1093182-3-memxor@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/timer_crash.c | 32 +++++++++++ + tools/testing/selftests/bpf/progs/timer_crash.c | 54 +++++++++++++++++++ + 2 files changed, 86 insertions(+) + create mode 100644 tools/testing/selftests/bpf/prog_tests/timer_crash.c + create mode 100644 tools/testing/selftests/bpf/progs/timer_crash.c + +--- /dev/null ++++ b/tools/testing/selftests/bpf/prog_tests/timer_crash.c +@@ -0,0 +1,32 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#include ++#include "timer_crash.skel.h" ++ ++enum { ++ MODE_ARRAY, ++ MODE_HASH, ++}; ++ ++static void test_timer_crash_mode(int mode) ++{ ++ struct timer_crash *skel; ++ ++ skel = timer_crash__open_and_load(); ++ if (!ASSERT_OK_PTR(skel, "timer_crash__open_and_load")) ++ return; ++ skel->bss->pid = getpid(); ++ skel->bss->crash_map = mode; ++ if (!ASSERT_OK(timer_crash__attach(skel), "timer_crash__attach")) ++ goto end; ++ usleep(1); ++end: ++ timer_crash__destroy(skel); ++} ++ ++void test_timer_crash(void) ++{ ++ if (test__start_subtest("array")) ++ test_timer_crash_mode(MODE_ARRAY); ++ if (test__start_subtest("hash")) ++ test_timer_crash_mode(MODE_HASH); ++} +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/timer_crash.c +@@ -0,0 +1,54 @@ ++// SPDX-License-Identifier: GPL-2.0 ++ ++#include ++#include ++#include ++ ++struct map_elem { ++ struct bpf_timer timer; ++ struct bpf_spin_lock lock; ++}; ++ ++struct { ++ __uint(type, BPF_MAP_TYPE_ARRAY); ++ __uint(max_entries, 1); ++ __type(key, int); ++ __type(value, struct map_elem); ++} amap SEC(".maps"); ++ ++struct { ++ __uint(type, BPF_MAP_TYPE_HASH); ++ __uint(max_entries, 1); ++ __type(key, int); ++ __type(value, struct map_elem); ++} hmap SEC(".maps"); ++ ++int pid = 0; ++int crash_map = 0; /* 0 for amap, 1 for hmap */ ++ ++SEC("fentry/do_nanosleep") ++int sys_enter(void *ctx) ++{ ++ struct map_elem *e, value = {}; ++ void *map = crash_map ? (void *)&hmap : (void *)&amap; ++ ++ if (bpf_get_current_task_btf()->tgid != pid) ++ return 0; ++ ++ *(void **)&value = (void *)0xdeadcaf3; ++ ++ bpf_map_update_elem(map, &(int){0}, &value, 0); ++ /* For array map, doing bpf_map_update_elem will do a ++ * check_and_free_timer_in_array, which will trigger the crash if timer ++ * pointer was overwritten, for hmap we need to use bpf_timer_cancel. ++ */ ++ if (crash_map == 1) { ++ e = bpf_map_lookup_elem(map, &(int){0}); ++ if (!e) ++ return 0; ++ bpf_timer_cancel(&e->timer); ++ } ++ return 0; ++} ++ ++char _license[] SEC("license") = "GPL"; diff --git a/patches.suse/selftests-bpf-Add-test-for-libbpf-s-custom-log_buf-b.patch b/patches.suse/selftests-bpf-Add-test-for-libbpf-s-custom-log_buf-b.patch new file mode 100644 index 0000000..8dea565 --- /dev/null +++ b/patches.suse/selftests-bpf-Add-test-for-libbpf-s-custom-log_buf-b.patch @@ -0,0 +1,328 @@ +From: Andrii Nakryiko +Date: Thu, 9 Dec 2021 11:38:38 -0800 +Subject: selftests/bpf: Add test for libbpf's custom log_buf behavior +Patch-mainline: v5.17-rc1 +Git-commit: 57e889269af3dd0609933e2550c4baee7a7eb84c +References: jsc#PED-1368 + +Add a selftest that validates that per-program and per-object log_buf +overrides work as expected. Also test same logic for low-level +bpf_prog_load() and bpf_btf_load() APIs. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211209193840.1248570-11-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/log_buf.c | 276 +++++++++++++++++++++++ + tools/testing/selftests/bpf/progs/test_log_buf.c | 24 ++ + 2 files changed, 300 insertions(+) + create mode 100644 tools/testing/selftests/bpf/prog_tests/log_buf.c + create mode 100644 tools/testing/selftests/bpf/progs/test_log_buf.c + +--- /dev/null ++++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c +@@ -0,0 +1,276 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) 2021 Facebook */ ++ ++#include ++#include ++ ++#include "test_log_buf.skel.h" ++ ++static size_t libbpf_log_pos; ++static char libbpf_log_buf[1024 * 1024]; ++static bool libbpf_log_error; ++ ++static int libbpf_print_cb(enum libbpf_print_level level, const char *fmt, va_list args) ++{ ++ int emitted_cnt; ++ size_t left_cnt; ++ ++ left_cnt = sizeof(libbpf_log_buf) - libbpf_log_pos; ++ emitted_cnt = vsnprintf(libbpf_log_buf + libbpf_log_pos, left_cnt, fmt, args); ++ ++ if (emitted_cnt < 0 || emitted_cnt + 1 > left_cnt) { ++ libbpf_log_error = true; ++ return 0; ++ } ++ ++ libbpf_log_pos += emitted_cnt; ++ return 0; ++} ++ ++static void obj_load_log_buf(void) ++{ ++ libbpf_print_fn_t old_print_cb = libbpf_set_print(libbpf_print_cb); ++ LIBBPF_OPTS(bpf_object_open_opts, opts); ++ const size_t log_buf_sz = 1024 * 1024; ++ struct test_log_buf* skel; ++ char *obj_log_buf, *good_log_buf, *bad_log_buf; ++ int err; ++ ++ obj_log_buf = malloc(3 * log_buf_sz); ++ if (!ASSERT_OK_PTR(obj_log_buf, "obj_log_buf")) ++ return; ++ ++ good_log_buf = obj_log_buf + log_buf_sz; ++ bad_log_buf = obj_log_buf + 2 * log_buf_sz; ++ obj_log_buf[0] = good_log_buf[0] = bad_log_buf[0] = '\0'; ++ ++ opts.kernel_log_buf = obj_log_buf; ++ opts.kernel_log_size = log_buf_sz; ++ opts.kernel_log_level = 4; /* for BTF this will turn into 1 */ ++ ++ /* In the first round every prog has its own log_buf, so libbpf logs ++ * don't have program failure logs ++ */ ++ skel = test_log_buf__open_opts(&opts); ++ if (!ASSERT_OK_PTR(skel, "skel_open")) ++ goto cleanup; ++ ++ /* set very verbose level for good_prog so we always get detailed logs */ ++ bpf_program__set_log_buf(skel->progs.good_prog, good_log_buf, log_buf_sz); ++ bpf_program__set_log_level(skel->progs.good_prog, 2); ++ ++ bpf_program__set_log_buf(skel->progs.bad_prog, bad_log_buf, log_buf_sz); ++ /* log_level 0 with custom log_buf means that verbose logs are not ++ * requested if program load is successful, but libbpf should retry ++ * with log_level 1 on error and put program's verbose load log into ++ * custom log_buf ++ */ ++ bpf_program__set_log_level(skel->progs.bad_prog, 0); ++ ++ err = test_log_buf__load(skel); ++ if (!ASSERT_ERR(err, "unexpected_load_success")) ++ goto cleanup; ++ ++ ASSERT_FALSE(libbpf_log_error, "libbpf_log_error"); ++ ++ /* there should be no prog loading log because we specified per-prog log buf */ ++ ASSERT_NULL(strstr(libbpf_log_buf, "-- BEGIN PROG LOAD LOG --"), "unexp_libbpf_log"); ++ ASSERT_OK_PTR(strstr(libbpf_log_buf, "prog 'bad_prog': BPF program load failed"), ++ "libbpf_log_not_empty"); ++ ASSERT_OK_PTR(strstr(obj_log_buf, "DATASEC license"), "obj_log_not_empty"); ++ ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx(id=0,off=0,imm=0) R10=fp0"), ++ "good_log_verbose"); ++ ASSERT_OK_PTR(strstr(bad_log_buf, "invalid access to map value, value_size=16 off=16000 size=4"), ++ "bad_log_not_empty"); ++ ++ if (env.verbosity > VERBOSE_NONE) { ++ printf("LIBBPF LOG: \n=================\n%s=================\n", libbpf_log_buf); ++ printf("OBJ LOG: \n=================\n%s=================\n", obj_log_buf); ++ printf("GOOD_PROG LOG:\n=================\n%s=================\n", good_log_buf); ++ printf("BAD_PROG LOG:\n=================\n%s=================\n", bad_log_buf); ++ } ++ ++ /* reset everything */ ++ test_log_buf__destroy(skel); ++ obj_log_buf[0] = good_log_buf[0] = bad_log_buf[0] = '\0'; ++ libbpf_log_buf[0] = '\0'; ++ libbpf_log_pos = 0; ++ libbpf_log_error = false; ++ ++ /* In the second round we let bad_prog's failure be logged through print callback */ ++ opts.kernel_log_buf = NULL; /* let everything through into print callback */ ++ opts.kernel_log_size = 0; ++ opts.kernel_log_level = 1; ++ ++ skel = test_log_buf__open_opts(&opts); ++ if (!ASSERT_OK_PTR(skel, "skel_open")) ++ goto cleanup; ++ ++ /* set normal verbose level for good_prog to check log_level is taken into account */ ++ bpf_program__set_log_buf(skel->progs.good_prog, good_log_buf, log_buf_sz); ++ bpf_program__set_log_level(skel->progs.good_prog, 1); ++ ++ err = test_log_buf__load(skel); ++ if (!ASSERT_ERR(err, "unexpected_load_success")) ++ goto cleanup; ++ ++ ASSERT_FALSE(libbpf_log_error, "libbpf_log_error"); ++ ++ /* this time prog loading error should be logged through print callback */ ++ ASSERT_OK_PTR(strstr(libbpf_log_buf, "libbpf: prog 'bad_prog': -- BEGIN PROG LOAD LOG --"), ++ "libbpf_log_correct"); ++ ASSERT_STREQ(obj_log_buf, "", "obj_log__empty"); ++ ASSERT_STREQ(good_log_buf, "processed 4 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n", ++ "good_log_ok"); ++ ASSERT_STREQ(bad_log_buf, "", "bad_log_empty"); ++ ++ if (env.verbosity > VERBOSE_NONE) { ++ printf("LIBBPF LOG: \n=================\n%s=================\n", libbpf_log_buf); ++ printf("OBJ LOG: \n=================\n%s=================\n", obj_log_buf); ++ printf("GOOD_PROG LOG:\n=================\n%s=================\n", good_log_buf); ++ printf("BAD_PROG LOG:\n=================\n%s=================\n", bad_log_buf); ++ } ++ ++cleanup: ++ free(obj_log_buf); ++ test_log_buf__destroy(skel); ++ libbpf_set_print(old_print_cb); ++} ++ ++static void bpf_prog_load_log_buf(void) ++{ ++ const struct bpf_insn good_prog_insns[] = { ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ }; ++ const size_t good_prog_insn_cnt = sizeof(good_prog_insns) / sizeof(struct bpf_insn); ++ const struct bpf_insn bad_prog_insns[] = { ++ BPF_EXIT_INSN(), ++ }; ++ size_t bad_prog_insn_cnt = sizeof(bad_prog_insns) / sizeof(struct bpf_insn); ++ LIBBPF_OPTS(bpf_prog_load_opts, opts); ++ const size_t log_buf_sz = 1024 * 1024; ++ char *log_buf; ++ int fd = -1; ++ ++ log_buf = malloc(log_buf_sz); ++ if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc")) ++ return; ++ opts.log_buf = log_buf; ++ opts.log_size = log_buf_sz; ++ ++ /* with log_level == 0 log_buf shoud stay empty for good prog */ ++ log_buf[0] = '\0'; ++ opts.log_level = 0; ++ fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "good_prog", "GPL", ++ good_prog_insns, good_prog_insn_cnt, &opts); ++ ASSERT_STREQ(log_buf, "", "good_log_0"); ++ ASSERT_GE(fd, 0, "good_fd1"); ++ if (fd >= 0) ++ close(fd); ++ fd = -1; ++ ++ /* log_level == 2 should always fill log_buf, even for good prog */ ++ log_buf[0] = '\0'; ++ opts.log_level = 2; ++ fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "good_prog", "GPL", ++ good_prog_insns, good_prog_insn_cnt, &opts); ++ ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx(id=0,off=0,imm=0) R10=fp0"), "good_log_2"); ++ ASSERT_GE(fd, 0, "good_fd2"); ++ if (fd >= 0) ++ close(fd); ++ fd = -1; ++ ++ /* log_level == 0 should fill log_buf for bad prog */ ++ log_buf[0] = '\0'; ++ opts.log_level = 0; ++ fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "bad_prog", "GPL", ++ bad_prog_insns, bad_prog_insn_cnt, &opts); ++ ASSERT_OK_PTR(strstr(log_buf, "R0 !read_ok"), "bad_log_0"); ++ ASSERT_LT(fd, 0, "bad_fd"); ++ if (fd >= 0) ++ close(fd); ++ fd = -1; ++ ++ free(log_buf); ++} ++ ++static void bpf_btf_load_log_buf(void) ++{ ++ LIBBPF_OPTS(bpf_btf_load_opts, opts); ++ const size_t log_buf_sz = 1024 * 1024; ++ const void *raw_btf_data; ++ __u32 raw_btf_size; ++ struct btf *btf; ++ char *log_buf; ++ int fd = -1; ++ ++ btf = btf__new_empty(); ++ if (!ASSERT_OK_PTR(btf, "empty_btf")) ++ return; ++ ++ ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type"); ++ ++ raw_btf_data = btf__raw_data(btf, &raw_btf_size); ++ if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data_good")) ++ goto cleanup; ++ ++ log_buf = malloc(log_buf_sz); ++ if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc")) ++ goto cleanup; ++ opts.log_buf = log_buf; ++ opts.log_size = log_buf_sz; ++ ++ /* with log_level == 0 log_buf shoud stay empty for good BTF */ ++ log_buf[0] = '\0'; ++ opts.log_level = 0; ++ fd = bpf_btf_load(raw_btf_data, raw_btf_size, &opts); ++ ASSERT_STREQ(log_buf, "", "good_log_0"); ++ ASSERT_GE(fd, 0, "good_fd1"); ++ if (fd >= 0) ++ close(fd); ++ fd = -1; ++ ++ /* log_level == 2 should always fill log_buf, even for good BTF */ ++ log_buf[0] = '\0'; ++ opts.log_level = 2; ++ fd = bpf_btf_load(raw_btf_data, raw_btf_size, &opts); ++ printf("LOG_BUF: %s\n", log_buf); ++ ASSERT_OK_PTR(strstr(log_buf, "magic: 0xeb9f"), "good_log_2"); ++ ASSERT_GE(fd, 0, "good_fd2"); ++ if (fd >= 0) ++ close(fd); ++ fd = -1; ++ ++ /* make BTF bad, add pointer pointing to non-existing type */ ++ ASSERT_GT(btf__add_ptr(btf, 100), 0, "bad_ptr_type"); ++ ++ raw_btf_data = btf__raw_data(btf, &raw_btf_size); ++ if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data_bad")) ++ goto cleanup; ++ ++ /* log_level == 0 should fill log_buf for bad BTF */ ++ log_buf[0] = '\0'; ++ opts.log_level = 0; ++ fd = bpf_btf_load(raw_btf_data, raw_btf_size, &opts); ++ printf("LOG_BUF: %s\n", log_buf); ++ ASSERT_OK_PTR(strstr(log_buf, "[2] PTR (anon) type_id=100 Invalid type_id"), "bad_log_0"); ++ ASSERT_LT(fd, 0, "bad_fd"); ++ if (fd >= 0) ++ close(fd); ++ fd = -1; ++ ++cleanup: ++ free(log_buf); ++ btf__free(btf); ++} ++ ++void test_log_buf(void) ++{ ++ if (test__start_subtest("obj_load_log_buf")) ++ obj_load_log_buf(); ++ if (test__start_subtest("bpf_prog_load_log_buf")) ++ bpf_prog_load_log_buf(); ++ if (test__start_subtest("bpf_btf_load_log_buf")) ++ bpf_btf_load_log_buf(); ++} +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/test_log_buf.c +@@ -0,0 +1,24 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) 2021 Facebook */ ++ ++#include ++#include ++ ++int a[4]; ++const volatile int off = 4000; ++ ++SEC("raw_tp/sys_enter") ++int good_prog(const void *ctx) ++{ ++ a[0] = (int)(long)ctx; ++ return a[1]; ++} ++ ++SEC("raw_tp/sys_enter") ++int bad_prog(const void *ctx) ++{ ++ /* out of bounds access */ ++ return a[off]; ++} ++ ++char _license[] SEC("license") = "GPL"; diff --git a/patches.suse/selftests-bpf-Add-test-to-access-int-ptr-argument-in.patch b/patches.suse/selftests-bpf-Add-test-to-access-int-ptr-argument-in.patch new file mode 100644 index 0000000..9da9236 --- /dev/null +++ b/patches.suse/selftests-bpf-Add-test-to-access-int-ptr-argument-in.patch @@ -0,0 +1,38 @@ +From: Jiri Olsa +Date: Wed, 8 Dec 2021 20:32:42 +0100 +Subject: selftests/bpf: Add test to access int ptr argument in tracing program +Patch-mainline: v5.17-rc1 +Git-commit: 2b070c2bc885977ca8fe76ba8f6b9d73d8d20e39 +References: jsc#PED-1368 + +Adding verifier test for accessing int pointer argument in +tracing programs. + +The test program loads 2nd argument of bpf_modify_return_test +function which is int pointer and checks that verifier allows +that. + +Signed-off-by: Jiri Olsa +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211208193245.172141-3-jolsa@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/verifier/btf_ctx_access.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + create mode 100644 tools/testing/selftests/bpf/verifier/btf_ctx_access.c + +--- /dev/null ++++ b/tools/testing/selftests/bpf/verifier/btf_ctx_access.c +@@ -0,0 +1,12 @@ ++{ ++ "btf_ctx_access accept", ++ .insns = { ++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 8), /* load 2nd argument value (int pointer) */ ++ BPF_MOV64_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ }, ++ .result = ACCEPT, ++ .prog_type = BPF_PROG_TYPE_TRACING, ++ .expected_attach_type = BPF_TRACE_FENTRY, ++ .kfunc = "bpf_modify_return_test", ++}, diff --git a/patches.suse/selftests-bpf-Add-tests-for-accessing-ingress_ifinde.patch b/patches.suse/selftests-bpf-Add-tests-for-accessing-ingress_ifinde.patch new file mode 100644 index 0000000..590aed8 --- /dev/null +++ b/patches.suse/selftests-bpf-Add-tests-for-accessing-ingress_ifinde.patch @@ -0,0 +1,127 @@ +From: Mark Pashmfouroush +Date: Wed, 10 Nov 2021 11:10:16 +0000 +Subject: selftests/bpf: Add tests for accessing ingress_ifindex in + bpf_sk_lookup +Patch-mainline: v5.17-rc1 +Git-commit: 8b4fd2bf1f47c3e3a63c327fca2ad5c4e2691ef8 +References: jsc#PED-1368 + +A new field was added to the bpf_sk_lookup data that users can access. +Add tests that validate that the new ingress_ifindex field contains the +right data. + +Signed-off-by: Mark Pashmfouroush +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211110111016.5670-3-markpash@cloudflare.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/sk_lookup.c | 31 ++++++++++++++++++ + tools/testing/selftests/bpf/progs/test_sk_lookup.c | 8 ++++ + tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c | 32 +++++++++++++++++++ + 3 files changed, 71 insertions(+) + +--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c ++++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +@@ -937,6 +937,37 @@ static void test_drop_on_lookup(struct t + .connect_to = { EXT_IP6, EXT_PORT }, + .listen_at = { EXT_IP6, INT_PORT }, + }, ++ /* The program will drop on success, meaning that the ifindex ++ * was 1. ++ */ ++ { ++ .desc = "TCP IPv4 drop on valid ifindex", ++ .lookup_prog = skel->progs.check_ifindex, ++ .sotype = SOCK_STREAM, ++ .connect_to = { EXT_IP4, EXT_PORT }, ++ .listen_at = { EXT_IP4, EXT_PORT }, ++ }, ++ { ++ .desc = "TCP IPv6 drop on valid ifindex", ++ .lookup_prog = skel->progs.check_ifindex, ++ .sotype = SOCK_STREAM, ++ .connect_to = { EXT_IP6, EXT_PORT }, ++ .listen_at = { EXT_IP6, EXT_PORT }, ++ }, ++ { ++ .desc = "UDP IPv4 drop on valid ifindex", ++ .lookup_prog = skel->progs.check_ifindex, ++ .sotype = SOCK_DGRAM, ++ .connect_to = { EXT_IP4, EXT_PORT }, ++ .listen_at = { EXT_IP4, EXT_PORT }, ++ }, ++ { ++ .desc = "UDP IPv6 drop on valid ifindex", ++ .lookup_prog = skel->progs.check_ifindex, ++ .sotype = SOCK_DGRAM, ++ .connect_to = { EXT_IP6, EXT_PORT }, ++ .listen_at = { EXT_IP6, EXT_PORT }, ++ }, + }; + const struct test *t; + +--- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c ++++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c +@@ -84,6 +84,14 @@ int lookup_drop(struct bpf_sk_lookup *ct + return SK_DROP; + } + ++SEC("sk_lookup") ++int check_ifindex(struct bpf_sk_lookup *ctx) ++{ ++ if (ctx->ingress_ifindex == 1) ++ return SK_DROP; ++ return SK_PASS; ++} ++ + SEC("sk_reuseport") + int reuseport_pass(struct sk_reuseport_md *ctx) + { +--- a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c ++++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c +@@ -229,6 +229,24 @@ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, local_port)), + ++ /* 1-byte read from ingress_ifindex field */ ++ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, ++ offsetof(struct bpf_sk_lookup, ingress_ifindex)), ++ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, ++ offsetof(struct bpf_sk_lookup, ingress_ifindex) + 1), ++ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, ++ offsetof(struct bpf_sk_lookup, ingress_ifindex) + 2), ++ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, ++ offsetof(struct bpf_sk_lookup, ingress_ifindex) + 3), ++ /* 2-byte read from ingress_ifindex field */ ++ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, ++ offsetof(struct bpf_sk_lookup, ingress_ifindex)), ++ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, ++ offsetof(struct bpf_sk_lookup, ingress_ifindex) + 2), ++ /* 4-byte read from ingress_ifindex field */ ++ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, ++ offsetof(struct bpf_sk_lookup, ingress_ifindex)), ++ + /* 8-byte read from sk field */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, sk)), +@@ -345,6 +363,20 @@ + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, ++ .errstr = "invalid bpf_context access", ++ .result = REJECT, ++ .prog_type = BPF_PROG_TYPE_SK_LOOKUP, ++ .expected_attach_type = BPF_SK_LOOKUP, ++ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, ++}, ++{ ++ "invalid 8-byte read from bpf_sk_lookup ingress_ifindex field", ++ .insns = { ++ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, ++ offsetof(struct bpf_sk_lookup, ingress_ifindex)), ++ BPF_MOV32_IMM(BPF_REG_0, 0), ++ BPF_EXIT_INSN(), ++ }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, diff --git a/patches.suse/selftests-bpf-Add-tests-for-bpf_find_vma.patch b/patches.suse/selftests-bpf-Add-tests-for-bpf_find_vma.patch new file mode 100644 index 0000000..3ffe332 --- /dev/null +++ b/patches.suse/selftests-bpf-Add-tests-for-bpf_find_vma.patch @@ -0,0 +1,287 @@ +From: Song Liu +Date: Fri, 5 Nov 2021 16:23:30 -0700 +Subject: selftests/bpf: Add tests for bpf_find_vma +Patch-mainline: v5.17-rc1 +Git-commit: f108662b27c96cdadfadd39f0c0d650704cd593d +References: jsc#PED-1368 + +Add tests for bpf_find_vma in perf_event program and kprobe program. The +perf_event program is triggered from NMI context, so the second call of +bpf_find_vma() will return -EBUSY (irq_work busy). The kprobe program, +on the other hand, does not have this constraint. + +Also add tests for illegal writes to task or vma from the callback +function. The verifier should reject both cases. + +Signed-off-by: Song Liu +Signed-off-by: Alexei Starovoitov +Acked-by: Yonghong Song +Link: https://lore.kernel.org/bpf/20211105232330.1936330-3-songliubraving@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/find_vma.c | 117 +++++++++++++++++++++ + tools/testing/selftests/bpf/progs/find_vma.c | 69 ++++++++++++ + tools/testing/selftests/bpf/progs/find_vma_fail1.c | 29 +++++ + tools/testing/selftests/bpf/progs/find_vma_fail2.c | 29 +++++ + 4 files changed, 244 insertions(+) + create mode 100644 tools/testing/selftests/bpf/prog_tests/find_vma.c + create mode 100644 tools/testing/selftests/bpf/progs/find_vma.c + create mode 100644 tools/testing/selftests/bpf/progs/find_vma_fail1.c + create mode 100644 tools/testing/selftests/bpf/progs/find_vma_fail2.c + +--- /dev/null ++++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c +@@ -0,0 +1,117 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) 2021 Facebook */ ++#include ++#include ++#include ++#include "find_vma.skel.h" ++#include "find_vma_fail1.skel.h" ++#include "find_vma_fail2.skel.h" ++ ++static void test_and_reset_skel(struct find_vma *skel, int expected_find_zero_ret) ++{ ++ ASSERT_EQ(skel->bss->found_vm_exec, 1, "found_vm_exec"); ++ ASSERT_EQ(skel->data->find_addr_ret, 0, "find_addr_ret"); ++ ASSERT_EQ(skel->data->find_zero_ret, expected_find_zero_ret, "find_zero_ret"); ++ ASSERT_OK_PTR(strstr(skel->bss->d_iname, "test_progs"), "find_test_progs"); ++ ++ skel->bss->found_vm_exec = 0; ++ skel->data->find_addr_ret = -1; ++ skel->data->find_zero_ret = -1; ++ skel->bss->d_iname[0] = 0; ++} ++ ++static int open_pe(void) ++{ ++ struct perf_event_attr attr = {0}; ++ int pfd; ++ ++ /* create perf event */ ++ attr.size = sizeof(attr); ++ attr.type = PERF_TYPE_HARDWARE; ++ attr.config = PERF_COUNT_HW_CPU_CYCLES; ++ attr.freq = 1; ++ attr.sample_freq = 4000; ++ pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); ++ ++ return pfd >= 0 ? pfd : -errno; ++} ++ ++static void test_find_vma_pe(struct find_vma *skel) ++{ ++ struct bpf_link *link = NULL; ++ volatile int j = 0; ++ int pfd, i; ++ ++ pfd = open_pe(); ++ if (pfd < 0) { ++ if (pfd == -ENOENT || pfd == -EOPNOTSUPP) { ++ printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__); ++ test__skip(); ++ goto cleanup; ++ } ++ if (!ASSERT_GE(pfd, 0, "perf_event_open")) ++ goto cleanup; ++ } ++ ++ link = bpf_program__attach_perf_event(skel->progs.handle_pe, pfd); ++ if (!ASSERT_OK_PTR(link, "attach_perf_event")) ++ goto cleanup; ++ ++ for (i = 0; i < 1000000; ++i) ++ ++j; ++ ++ test_and_reset_skel(skel, -EBUSY /* in nmi, irq_work is busy */); ++cleanup: ++ bpf_link__destroy(link); ++ close(pfd); ++} ++ ++static void test_find_vma_kprobe(struct find_vma *skel) ++{ ++ int err; ++ ++ err = find_vma__attach(skel); ++ if (!ASSERT_OK(err, "get_branch_snapshot__attach")) ++ return; ++ ++ getpgid(skel->bss->target_pid); ++ test_and_reset_skel(skel, -ENOENT /* could not find vma for ptr 0 */); ++} ++ ++static void test_illegal_write_vma(void) ++{ ++ struct find_vma_fail1 *skel; ++ ++ skel = find_vma_fail1__open_and_load(); ++ if (!ASSERT_ERR_PTR(skel, "find_vma_fail1__open_and_load")) ++ find_vma_fail1__destroy(skel); ++} ++ ++static void test_illegal_write_task(void) ++{ ++ struct find_vma_fail2 *skel; ++ ++ skel = find_vma_fail2__open_and_load(); ++ if (!ASSERT_ERR_PTR(skel, "find_vma_fail2__open_and_load")) ++ find_vma_fail2__destroy(skel); ++} ++ ++void serial_test_find_vma(void) ++{ ++ struct find_vma *skel; ++ ++ skel = find_vma__open_and_load(); ++ if (!ASSERT_OK_PTR(skel, "find_vma__open_and_load")) ++ return; ++ ++ skel->bss->target_pid = getpid(); ++ skel->bss->addr = (__u64)(uintptr_t)test_find_vma_pe; ++ ++ test_find_vma_pe(skel); ++ usleep(100000); /* allow the irq_work to finish */ ++ test_find_vma_kprobe(skel); ++ ++ find_vma__destroy(skel); ++ test_illegal_write_vma(); ++ test_illegal_write_task(); ++} +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/find_vma.c +@@ -0,0 +1,69 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) 2021 Facebook */ ++#include "vmlinux.h" ++#include ++#include ++ ++char _license[] SEC("license") = "GPL"; ++ ++struct callback_ctx { ++ int dummy; ++}; ++ ++#define VM_EXEC 0x00000004 ++#define DNAME_INLINE_LEN 32 ++ ++pid_t target_pid = 0; ++char d_iname[DNAME_INLINE_LEN] = {0}; ++__u32 found_vm_exec = 0; ++__u64 addr = 0; ++int find_zero_ret = -1; ++int find_addr_ret = -1; ++ ++static long check_vma(struct task_struct *task, struct vm_area_struct *vma, ++ struct callback_ctx *data) ++{ ++ if (vma->vm_file) ++ bpf_probe_read_kernel_str(d_iname, DNAME_INLINE_LEN - 1, ++ vma->vm_file->f_path.dentry->d_iname); ++ ++ /* check for VM_EXEC */ ++ if (vma->vm_flags & VM_EXEC) ++ found_vm_exec = 1; ++ ++ return 0; ++} ++ ++SEC("raw_tp/sys_enter") ++int handle_getpid(void) ++{ ++ struct task_struct *task = bpf_get_current_task_btf(); ++ struct callback_ctx data = {}; ++ ++ if (task->pid != target_pid) ++ return 0; ++ ++ find_addr_ret = bpf_find_vma(task, addr, check_vma, &data, 0); ++ ++ /* this should return -ENOENT */ ++ find_zero_ret = bpf_find_vma(task, 0, check_vma, &data, 0); ++ return 0; ++} ++ ++SEC("perf_event") ++int handle_pe(void) ++{ ++ struct task_struct *task = bpf_get_current_task_btf(); ++ struct callback_ctx data = {}; ++ ++ if (task->pid != target_pid) ++ return 0; ++ ++ find_addr_ret = bpf_find_vma(task, addr, check_vma, &data, 0); ++ ++ /* In NMI, this should return -EBUSY, as the previous call is using ++ * the irq_work. ++ */ ++ find_zero_ret = bpf_find_vma(task, 0, check_vma, &data, 0); ++ return 0; ++} +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/find_vma_fail1.c +@@ -0,0 +1,29 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) 2021 Facebook */ ++#include "vmlinux.h" ++#include ++ ++char _license[] SEC("license") = "GPL"; ++ ++struct callback_ctx { ++ int dummy; ++}; ++ ++static long write_vma(struct task_struct *task, struct vm_area_struct *vma, ++ struct callback_ctx *data) ++{ ++ /* writing to vma, which is illegal */ ++ vma->vm_flags |= 0x55; ++ ++ return 0; ++} ++ ++SEC("raw_tp/sys_enter") ++int handle_getpid(void) ++{ ++ struct task_struct *task = bpf_get_current_task_btf(); ++ struct callback_ctx data = {}; ++ ++ bpf_find_vma(task, 0, write_vma, &data, 0); ++ return 0; ++} +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/find_vma_fail2.c +@@ -0,0 +1,29 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) 2021 Facebook */ ++#include "vmlinux.h" ++#include ++ ++char _license[] SEC("license") = "GPL"; ++ ++struct callback_ctx { ++ int dummy; ++}; ++ ++static long write_task(struct task_struct *task, struct vm_area_struct *vma, ++ struct callback_ctx *data) ++{ ++ /* writing to task, which is illegal */ ++ task->mm = NULL; ++ ++ return 0; ++} ++ ++SEC("raw_tp/sys_enter") ++int handle_getpid(void) ++{ ++ struct task_struct *task = bpf_get_current_task_btf(); ++ struct callback_ctx data = {}; ++ ++ bpf_find_vma(task, 0, write_task, &data, 0); ++ return 0; ++} diff --git a/patches.suse/selftests-bpf-Add-tests-for-get_func_-arg-ret-arg_cn.patch b/patches.suse/selftests-bpf-Add-tests-for-get_func_-arg-ret-arg_cn.patch new file mode 100644 index 0000000..385d1a3 --- /dev/null +++ b/patches.suse/selftests-bpf-Add-tests-for-get_func_-arg-ret-arg_cn.patch @@ -0,0 +1,194 @@ +From: Jiri Olsa +Date: Wed, 8 Dec 2021 20:32:45 +0100 +Subject: selftests/bpf: Add tests for get_func_[arg|ret|arg_cnt] helpers +Patch-mainline: v5.17-rc1 +Git-commit: 006004b715569f742535f70f3f06b41d8135486c +References: jsc#PED-1368 + +Adding tests for get_func_[arg|ret|arg_cnt] helpers. +Using these helpers in fentry/fexit/fmod_ret programs. + +Signed-off-by: Jiri Olsa +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211208193245.172141-6-jolsa@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/get_func_args_test.c | 44 ++++ + tools/testing/selftests/bpf/progs/get_func_args_test.c | 123 ++++++++++++ + 2 files changed, 167 insertions(+) + create mode 100644 tools/testing/selftests/bpf/prog_tests/get_func_args_test.c + create mode 100644 tools/testing/selftests/bpf/progs/get_func_args_test.c + +--- /dev/null ++++ b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c +@@ -0,0 +1,44 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#include ++#include "get_func_args_test.skel.h" ++ ++void test_get_func_args_test(void) ++{ ++ struct get_func_args_test *skel = NULL; ++ __u32 duration = 0, retval; ++ int err, prog_fd; ++ ++ skel = get_func_args_test__open_and_load(); ++ if (!ASSERT_OK_PTR(skel, "get_func_args_test__open_and_load")) ++ return; ++ ++ err = get_func_args_test__attach(skel); ++ if (!ASSERT_OK(err, "get_func_args_test__attach")) ++ goto cleanup; ++ ++ /* This runs bpf_fentry_test* functions and triggers ++ * fentry/fexit programs. ++ */ ++ prog_fd = bpf_program__fd(skel->progs.test1); ++ err = bpf_prog_test_run(prog_fd, 1, NULL, 0, ++ NULL, NULL, &retval, &duration); ++ ASSERT_OK(err, "test_run"); ++ ASSERT_EQ(retval, 0, "test_run"); ++ ++ /* This runs bpf_modify_return_test function and triggers ++ * fmod_ret_test and fexit_test programs. ++ */ ++ prog_fd = bpf_program__fd(skel->progs.fmod_ret_test); ++ err = bpf_prog_test_run(prog_fd, 1, NULL, 0, ++ NULL, NULL, &retval, &duration); ++ ASSERT_OK(err, "test_run"); ++ ASSERT_EQ(retval, 1234, "test_run"); ++ ++ ASSERT_EQ(skel->bss->test1_result, 1, "test1_result"); ++ ASSERT_EQ(skel->bss->test2_result, 1, "test2_result"); ++ ASSERT_EQ(skel->bss->test3_result, 1, "test3_result"); ++ ASSERT_EQ(skel->bss->test4_result, 1, "test4_result"); ++ ++cleanup: ++ get_func_args_test__destroy(skel); ++} +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/get_func_args_test.c +@@ -0,0 +1,123 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#include ++#include ++#include ++#include ++ ++char _license[] SEC("license") = "GPL"; ++ ++__u64 test1_result = 0; ++SEC("fentry/bpf_fentry_test1") ++int BPF_PROG(test1) ++{ ++ __u64 cnt = bpf_get_func_arg_cnt(ctx); ++ __u64 a = 0, z = 0, ret = 0; ++ __s64 err; ++ ++ test1_result = cnt == 1; ++ ++ /* valid arguments */ ++ err = bpf_get_func_arg(ctx, 0, &a); ++ ++ /* We need to cast access to traced function argument values with ++ * proper type cast, because trampoline uses type specific instruction ++ * to save it, like for 'int a' with 32-bit mov like: ++ * ++ * mov %edi,-0x8(%rbp) ++ * ++ * so the upper 4 bytes are not zeroed. ++ */ ++ test1_result &= err == 0 && ((int) a == 1); ++ ++ /* not valid argument */ ++ err = bpf_get_func_arg(ctx, 1, &z); ++ test1_result &= err == -EINVAL; ++ ++ /* return value fails in fentry */ ++ err = bpf_get_func_ret(ctx, &ret); ++ test1_result &= err == -EOPNOTSUPP; ++ return 0; ++} ++ ++__u64 test2_result = 0; ++SEC("fexit/bpf_fentry_test2") ++int BPF_PROG(test2) ++{ ++ __u64 cnt = bpf_get_func_arg_cnt(ctx); ++ __u64 a = 0, b = 0, z = 0, ret = 0; ++ __s64 err; ++ ++ test2_result = cnt == 2; ++ ++ /* valid arguments */ ++ err = bpf_get_func_arg(ctx, 0, &a); ++ test2_result &= err == 0 && (int) a == 2; ++ ++ err = bpf_get_func_arg(ctx, 1, &b); ++ test2_result &= err == 0 && b == 3; ++ ++ /* not valid argument */ ++ err = bpf_get_func_arg(ctx, 2, &z); ++ test2_result &= err == -EINVAL; ++ ++ /* return value */ ++ err = bpf_get_func_ret(ctx, &ret); ++ test2_result &= err == 0 && ret == 5; ++ return 0; ++} ++ ++__u64 test3_result = 0; ++SEC("fmod_ret/bpf_modify_return_test") ++int BPF_PROG(fmod_ret_test, int _a, int *_b, int _ret) ++{ ++ __u64 cnt = bpf_get_func_arg_cnt(ctx); ++ __u64 a = 0, b = 0, z = 0, ret = 0; ++ __s64 err; ++ ++ test3_result = cnt == 2; ++ ++ /* valid arguments */ ++ err = bpf_get_func_arg(ctx, 0, &a); ++ test3_result &= err == 0 && ((int) a == 1); ++ ++ err = bpf_get_func_arg(ctx, 1, &b); ++ test3_result &= err == 0 && ((int *) b == _b); ++ ++ /* not valid argument */ ++ err = bpf_get_func_arg(ctx, 2, &z); ++ test3_result &= err == -EINVAL; ++ ++ /* return value */ ++ err = bpf_get_func_ret(ctx, &ret); ++ test3_result &= err == 0 && ret == 0; ++ ++ /* change return value, it's checked in fexit_test program */ ++ return 1234; ++} ++ ++__u64 test4_result = 0; ++SEC("fexit/bpf_modify_return_test") ++int BPF_PROG(fexit_test, int _a, int *_b, int _ret) ++{ ++ __u64 cnt = bpf_get_func_arg_cnt(ctx); ++ __u64 a = 0, b = 0, z = 0, ret = 0; ++ __s64 err; ++ ++ test4_result = cnt == 2; ++ ++ /* valid arguments */ ++ err = bpf_get_func_arg(ctx, 0, &a); ++ test4_result &= err == 0 && ((int) a == 1); ++ ++ err = bpf_get_func_arg(ctx, 1, &b); ++ test4_result &= err == 0 && ((int *) b == _b); ++ ++ /* not valid argument */ ++ err = bpf_get_func_arg(ctx, 2, &z); ++ test4_result &= err == -EINVAL; ++ ++ /* return value */ ++ err = bpf_get_func_ret(ctx, &ret); ++ test4_result &= err == 0 && ret == 1234; ++ return 0; ++} diff --git a/patches.suse/selftests-bpf-Add-uprobe-triggering-overhead-benchma.patch b/patches.suse/selftests-bpf-Add-uprobe-triggering-overhead-benchma.patch new file mode 100644 index 0000000..ad0d720 --- /dev/null +++ b/patches.suse/selftests-bpf-Add-uprobe-triggering-overhead-benchma.patch @@ -0,0 +1,282 @@ +From: Andrii Nakryiko +Date: Mon, 15 Nov 2021 17:30:41 -0800 +Subject: selftests/bpf: Add uprobe triggering overhead benchmarks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.17-rc1 +Git-commit: d41bc48bfab2076f7db88d079a3a3203dd9c4a54 +References: jsc#PED-1368 + +Add benchmark to measure overhead of uprobes and uretprobes. Also have +a baseline (no uprobe attached) benchmark. + +On my dev machine, baseline benchmark can trigger 130M user_target() +invocations. When uprobe is attached, this falls to just 700K. With +uretprobe, we get down to 520K: + + $ sudo ./bench trig-uprobe-base -a + Summary: hits 131.289 ± 2.872M/s + + # UPROBE + $ sudo ./bench -a trig-uprobe-without-nop + Summary: hits 0.729 ± 0.007M/s + + $ sudo ./bench -a trig-uprobe-with-nop + Summary: hits 1.798 ± 0.017M/s + + # URETPROBE + $ sudo ./bench -a trig-uretprobe-without-nop + Summary: hits 0.508 ± 0.012M/s + + $ sudo ./bench -a trig-uretprobe-with-nop + Summary: hits 0.883 ± 0.008M/s + +So there is almost 2.5x performance difference between probing nop vs +non-nop instruction for entry uprobe. And 1.7x difference for uretprobe. + +This means that non-nop uprobe overhead is around 1.4 microseconds for uprobe +and 2 microseconds for non-nop uretprobe. + +For nop variants, uprobe and uretprobe overhead is down to 0.556 and +1.13 microseconds, respectively. + +For comparison, just doing a very low-overhead syscall (with no BPF +programs attached anywhere) gives: + + $ sudo ./bench trig-base -a + Summary: hits 4.830 ± 0.036M/s + +So uprobes are about 2.67x slower than pure context switch. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211116013041.4072571-1-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/Makefile | 4 + tools/testing/selftests/bpf/bench.c | 10 + + tools/testing/selftests/bpf/benchs/bench_trigger.c | 146 +++++++++++++++++++++ + tools/testing/selftests/bpf/progs/trigger_bench.c | 7 + + 4 files changed, 166 insertions(+), 1 deletion(-) + +--- a/tools/testing/selftests/bpf/Makefile ++++ b/tools/testing/selftests/bpf/Makefile +@@ -533,7 +533,9 @@ $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ri + $(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h + $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) + $(OUTPUT)/bench: LDLIBS += -lm +-$(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ ++$(OUTPUT)/bench: $(OUTPUT)/bench.o \ ++ $(OUTPUT)/testing_helpers.o \ ++ $(OUTPUT)/trace_helpers.o \ + $(OUTPUT)/bench_count.o \ + $(OUTPUT)/bench_rename.o \ + $(OUTPUT)/bench_trigger.o \ +--- a/tools/testing/selftests/bpf/bench.c ++++ b/tools/testing/selftests/bpf/bench.c +@@ -359,6 +359,11 @@ extern const struct bench bench_trig_kpr + extern const struct bench bench_trig_fentry; + extern const struct bench bench_trig_fentry_sleep; + extern const struct bench bench_trig_fmodret; ++extern const struct bench bench_trig_uprobe_base; ++extern const struct bench bench_trig_uprobe_with_nop; ++extern const struct bench bench_trig_uretprobe_with_nop; ++extern const struct bench bench_trig_uprobe_without_nop; ++extern const struct bench bench_trig_uretprobe_without_nop; + extern const struct bench bench_rb_libbpf; + extern const struct bench bench_rb_custom; + extern const struct bench bench_pb_libbpf; +@@ -385,6 +390,11 @@ static const struct bench *benchs[] = { + &bench_trig_fentry, + &bench_trig_fentry_sleep, + &bench_trig_fmodret, ++ &bench_trig_uprobe_base, ++ &bench_trig_uprobe_with_nop, ++ &bench_trig_uretprobe_with_nop, ++ &bench_trig_uprobe_without_nop, ++ &bench_trig_uretprobe_without_nop, + &bench_rb_libbpf, + &bench_rb_custom, + &bench_pb_libbpf, +--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c ++++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c +@@ -2,6 +2,7 @@ + /* Copyright (c) 2020 Facebook */ + #include "bench.h" + #include "trigger_bench.skel.h" ++#include "trace_helpers.h" + + /* BPF triggering benchmarks */ + static struct trigger_ctx { +@@ -107,6 +108,101 @@ static void *trigger_consumer(void *inpu + return NULL; + } + ++/* make sure call is not inlined and not avoided by compiler, so __weak and ++ * inline asm volatile in the body of the function ++ * ++ * There is a performance difference between uprobing at nop location vs other ++ * instructions. So use two different targets, one of which starts with nop ++ * and another doesn't. ++ * ++ * GCC doesn't generate stack setup preample for these functions due to them ++ * having no input arguments and doing nothing in the body. ++ */ ++__weak void uprobe_target_with_nop(void) ++{ ++ asm volatile ("nop"); ++} ++ ++__weak void uprobe_target_without_nop(void) ++{ ++ asm volatile (""); ++} ++ ++static void *uprobe_base_producer(void *input) ++{ ++ while (true) { ++ uprobe_target_with_nop(); ++ atomic_inc(&base_hits.value); ++ } ++ return NULL; ++} ++ ++static void *uprobe_producer_with_nop(void *input) ++{ ++ while (true) ++ uprobe_target_with_nop(); ++ return NULL; ++} ++ ++static void *uprobe_producer_without_nop(void *input) ++{ ++ while (true) ++ uprobe_target_without_nop(); ++ return NULL; ++} ++ ++static void usetup(bool use_retprobe, bool use_nop) ++{ ++ size_t uprobe_offset; ++ ssize_t base_addr; ++ struct bpf_link *link; ++ ++ setup_libbpf(); ++ ++ ctx.skel = trigger_bench__open_and_load(); ++ if (!ctx.skel) { ++ fprintf(stderr, "failed to open skeleton\n"); ++ exit(1); ++ } ++ ++ base_addr = get_base_addr(); ++ if (use_nop) ++ uprobe_offset = get_uprobe_offset(&uprobe_target_with_nop, base_addr); ++ else ++ uprobe_offset = get_uprobe_offset(&uprobe_target_without_nop, base_addr); ++ ++ link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe, ++ use_retprobe, ++ -1 /* all PIDs */, ++ "/proc/self/exe", ++ uprobe_offset); ++ if (!link) { ++ fprintf(stderr, "failed to attach uprobe!\n"); ++ exit(1); ++ } ++ ctx.skel->links.bench_trigger_uprobe = link; ++} ++ ++static void uprobe_setup_with_nop() ++{ ++ usetup(false, true); ++} ++ ++static void uretprobe_setup_with_nop() ++{ ++ usetup(true, true); ++} ++ ++static void uprobe_setup_without_nop() ++{ ++ usetup(false, false); ++} ++ ++static void uretprobe_setup_without_nop() ++{ ++ usetup(true, false); ++} ++ + const struct bench bench_trig_base = { + .name = "trig-base", + .validate = trigger_validate, +@@ -180,5 +276,55 @@ const struct bench bench_trig_fmodret = + .consumer_thread = trigger_consumer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, ++ .report_final = hits_drops_report_final, ++}; ++ ++const struct bench bench_trig_uprobe_base = { ++ .name = "trig-uprobe-base", ++ .setup = NULL, /* no uprobe/uretprobe is attached */ ++ .producer_thread = uprobe_base_producer, ++ .consumer_thread = trigger_consumer, ++ .measure = trigger_base_measure, ++ .report_progress = hits_drops_report_progress, ++ .report_final = hits_drops_report_final, ++}; ++ ++const struct bench bench_trig_uprobe_with_nop = { ++ .name = "trig-uprobe-with-nop", ++ .setup = uprobe_setup_with_nop, ++ .producer_thread = uprobe_producer_with_nop, ++ .consumer_thread = trigger_consumer, ++ .measure = trigger_measure, ++ .report_progress = hits_drops_report_progress, ++ .report_final = hits_drops_report_final, ++}; ++ ++const struct bench bench_trig_uretprobe_with_nop = { ++ .name = "trig-uretprobe-with-nop", ++ .setup = uretprobe_setup_with_nop, ++ .producer_thread = uprobe_producer_with_nop, ++ .consumer_thread = trigger_consumer, ++ .measure = trigger_measure, ++ .report_progress = hits_drops_report_progress, ++ .report_final = hits_drops_report_final, ++}; ++ ++const struct bench bench_trig_uprobe_without_nop = { ++ .name = "trig-uprobe-without-nop", ++ .setup = uprobe_setup_without_nop, ++ .producer_thread = uprobe_producer_without_nop, ++ .consumer_thread = trigger_consumer, ++ .measure = trigger_measure, ++ .report_progress = hits_drops_report_progress, ++ .report_final = hits_drops_report_final, ++}; ++ ++const struct bench bench_trig_uretprobe_without_nop = { ++ .name = "trig-uretprobe-without-nop", ++ .setup = uretprobe_setup_without_nop, ++ .producer_thread = uprobe_producer_without_nop, ++ .consumer_thread = trigger_consumer, ++ .measure = trigger_measure, ++ .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, + }; +--- a/tools/testing/selftests/bpf/progs/trigger_bench.c ++++ b/tools/testing/selftests/bpf/progs/trigger_bench.c +@@ -52,3 +52,10 @@ int bench_trigger_fmodret(void *ctx) + __sync_add_and_fetch(&hits, 1); + return -22; + } ++ ++SEC("uprobe/self/uprobe_target") ++int bench_trigger_uprobe(void *ctx) ++{ ++ __sync_add_and_fetch(&hits, 1); ++ return 0; ++} diff --git a/patches.suse/selftests-bpf-Additional-test-for-CO-RE-in-the-kerne.patch b/patches.suse/selftests-bpf-Additional-test-for-CO-RE-in-the-kerne.patch new file mode 100644 index 0000000..cd83ae7 --- /dev/null +++ b/patches.suse/selftests-bpf-Additional-test-for-CO-RE-in-the-kerne.patch @@ -0,0 +1,162 @@ +From: Alexei Starovoitov +Date: Wed, 1 Dec 2021 10:10:38 -0800 +Subject: selftests/bpf: Additional test for CO-RE in the kernel. +Patch-mainline: v5.17-rc1 +Git-commit: 26b367e3663931f2fee5f0786a1eff712e67b0bf +References: jsc#PED-1368 + +Add a test where randmap() function is appended to three different bpf +programs. That action checks struct bpf_core_relo replication logic +and offset adjustment in gen loader part of libbpf. + +Fourth bpf program has 360 CO-RE relocations from vmlinux, bpf_testmod, +and non-existing type. It tests candidate cache logic. + +Signed-off-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211201181040.23337-16-alexei.starovoitov@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/Makefile | 2 + tools/testing/selftests/bpf/prog_tests/core_kern.c | 14 ++ + tools/testing/selftests/bpf/progs/core_kern.c | 104 +++++++++++++++++++++ + 3 files changed, 119 insertions(+), 1 deletion(-) + create mode 100644 tools/testing/selftests/bpf/prog_tests/core_kern.c + create mode 100644 tools/testing/selftests/bpf/progs/core_kern.c + +--- a/tools/testing/selftests/bpf/Makefile ++++ b/tools/testing/selftests/bpf/Makefile +@@ -326,7 +326,7 @@ LINKED_SKELS := test_static_linked.skel. + + LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \ + test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c \ +- map_ptr_kern.c ++ map_ptr_kern.c core_kern.c + # Generate both light skeleton and libbpf skeleton for these + LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test_subprog.c + SKEL_BLACKLIST += $$(LSKELS) +--- /dev/null ++++ b/tools/testing/selftests/bpf/prog_tests/core_kern.c +@@ -0,0 +1,14 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) 2021 Facebook */ ++ ++#include "test_progs.h" ++#include "core_kern.lskel.h" ++ ++void test_core_kern_lskel(void) ++{ ++ struct core_kern_lskel *skel; ++ ++ skel = core_kern_lskel__open_and_load(); ++ ASSERT_OK_PTR(skel, "open_and_load"); ++ core_kern_lskel__destroy(skel); ++} +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/core_kern.c +@@ -0,0 +1,104 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) 2021 Facebook */ ++#include "vmlinux.h" ++ ++#include ++#include ++#include ++ ++#define ATTR __always_inline ++#include "test_jhash.h" ++ ++struct { ++ __uint(type, BPF_MAP_TYPE_ARRAY); ++ __type(key, u32); ++ __type(value, u32); ++ __uint(max_entries, 256); ++} array1 SEC(".maps"); ++ ++struct { ++ __uint(type, BPF_MAP_TYPE_ARRAY); ++ __type(key, u32); ++ __type(value, u32); ++ __uint(max_entries, 256); ++} array2 SEC(".maps"); ++ ++static __noinline int randmap(int v, const struct net_device *dev) ++{ ++ struct bpf_map *map = (struct bpf_map *)&array1; ++ int key = bpf_get_prandom_u32() & 0xff; ++ int *val; ++ ++ if (bpf_get_prandom_u32() & 1) ++ map = (struct bpf_map *)&array2; ++ ++ val = bpf_map_lookup_elem(map, &key); ++ if (val) ++ *val = bpf_get_prandom_u32() + v + dev->mtu; ++ ++ return 0; ++} ++ ++SEC("tp_btf/xdp_devmap_xmit") ++int BPF_PROG(tp_xdp_devmap_xmit_multi, const struct net_device ++ *from_dev, const struct net_device *to_dev, int sent, int drops, ++ int err) ++{ ++ return randmap(from_dev->ifindex, from_dev); ++} ++ ++SEC("fentry/eth_type_trans") ++int BPF_PROG(fentry_eth_type_trans, struct sk_buff *skb, ++ struct net_device *dev, unsigned short protocol) ++{ ++ return randmap(dev->ifindex + skb->len, dev); ++} ++ ++SEC("fexit/eth_type_trans") ++int BPF_PROG(fexit_eth_type_trans, struct sk_buff *skb, ++ struct net_device *dev, unsigned short protocol) ++{ ++ return randmap(dev->ifindex + skb->len, dev); ++} ++ ++volatile const int never; ++ ++struct __sk_bUfF /* it will not exist in vmlinux */ { ++ int len; ++} __attribute__((preserve_access_index)); ++ ++struct bpf_testmod_test_read_ctx /* it exists in bpf_testmod */ { ++ size_t len; ++} __attribute__((preserve_access_index)); ++ ++SEC("tc") ++int balancer_ingress(struct __sk_buff *ctx) ++{ ++ void *data_end = (void *)(long)ctx->data_end; ++ void *data = (void *)(long)ctx->data; ++ void *ptr; ++ int ret = 0, nh_off, i = 0; ++ ++ nh_off = 14; ++ ++ /* pragma unroll doesn't work on large loops */ ++#define C do { \ ++ ptr = data + i; \ ++ if (ptr + nh_off > data_end) \ ++ break; \ ++ ctx->tc_index = jhash(ptr, nh_off, ctx->cb[0] + i++); \ ++ if (never) { \ ++ /* below is a dead code with unresolvable CO-RE relo */ \ ++ i += ((struct __sk_bUfF *)ctx)->len; \ ++ /* this CO-RE relo may or may not resolve ++ * depending on whether bpf_testmod is loaded. ++ */ \ ++ i += ((struct bpf_testmod_test_read_ctx *)ctx)->len; \ ++ } \ ++ } while (0); ++#define C30 C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C; ++ C30;C30;C30; /* 90 calls */ ++ return 0; ++} ++ ++char LICENSE[] SEC("license") = "GPL"; diff --git a/patches.suse/selftests-bpf-Avoid-duplicate-btf__parse-call.patch b/patches.suse/selftests-bpf-Avoid-duplicate-btf__parse-call.patch new file mode 100644 index 0000000..1dbe998 --- /dev/null +++ b/patches.suse/selftests-bpf-Avoid-duplicate-btf__parse-call.patch @@ -0,0 +1,30 @@ +From: Andrii Nakryiko +Date: Sun, 7 Nov 2021 08:55:19 -0800 +Subject: selftests/bpf: Avoid duplicate btf__parse() call +Patch-mainline: v5.17-rc1 +Git-commit: f92321d706a810b89a905e04658e38931c4bb0e0 +References: jsc#PED-1368 + +btf__parse() is repeated after successful setup, leaving the first +instance leaked. Remove redundant and premature call. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Reviewed-by: Hengqi Chen +Link: https://lore.kernel.org/bpf/20211107165521.9240-8-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/core_reloc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c ++++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c +@@ -433,7 +433,7 @@ static int setup_type_id_case_local(stru + + static int setup_type_id_case_success(struct core_reloc_test_case *test) { + struct core_reloc_type_id_output *exp = (void *)test->output; +- struct btf *targ_btf = btf__parse(test->btf_src_file, NULL); ++ struct btf *targ_btf; + int err; + + err = setup_type_id_case_local(test); diff --git a/patches.suse/selftests-bpf-Build-testing_helpers.o-out-of-tree.patch b/patches.suse/selftests-bpf-Build-testing_helpers.o-out-of-tree.patch new file mode 100644 index 0000000..b10e700 --- /dev/null +++ b/patches.suse/selftests-bpf-Build-testing_helpers.o-out-of-tree.patch @@ -0,0 +1,81 @@ +From: Jean-Philippe Brucker +Date: Wed, 1 Dec 2021 14:51:02 +0000 +Subject: selftests/bpf: Build testing_helpers.o out of tree +Patch-mainline: v5.17-rc1 +Git-commit: eee9a6df0eed6481d5448a55b218a45868b41b5b +References: jsc#PED-1368 + +Add $(OUTPUT) prefix to testing_helpers.o, so it can be built out of +tree when necessary. At the moment, in addition to being built in-tree +even when out-of-tree is required, testing_helpers.o is not built with +the right recipe when cross-building. + +For consistency the other helpers, cgroup_helpers and trace_helpers, can +also be passed as objects instead of source. Use *_HELPERS variable to +keep the Makefile readable. + +Fixes: f87c1930ac29 ("selftests/bpf: Merge test_stub.c into testing_helpers.c") +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211201145101.823159-1-jean-philippe@linaro.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/Makefile | 40 +++++++++++++++++++---------------- + 1 file changed, 22 insertions(+), 18 deletions(-) + +--- a/tools/testing/selftests/bpf/Makefile ++++ b/tools/testing/selftests/bpf/Makefile +@@ -192,22 +192,26 @@ TEST_GEN_PROGS_EXTENDED += $(DEFAULT_BPF + + $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ) + +-$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c testing_helpers.o +-$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c testing_helpers.o +-$(OUTPUT)/test_sock: cgroup_helpers.c testing_helpers.o +-$(OUTPUT)/test_sock_addr: cgroup_helpers.c testing_helpers.o +-$(OUTPUT)/test_sockmap: cgroup_helpers.c testing_helpers.o +-$(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c testing_helpers.o +-$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c testing_helpers.o +-$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c testing_helpers.o +-$(OUTPUT)/test_sock_fields: cgroup_helpers.c testing_helpers.o +-$(OUTPUT)/test_sysctl: cgroup_helpers.c testing_helpers.o +-$(OUTPUT)/test_tag: testing_helpers.o +-$(OUTPUT)/test_lirc_mode2_user: testing_helpers.o +-$(OUTPUT)/xdping: testing_helpers.o +-$(OUTPUT)/flow_dissector_load: testing_helpers.o +-$(OUTPUT)/test_maps: testing_helpers.o +-$(OUTPUT)/test_verifier: testing_helpers.o ++CGROUP_HELPERS := $(OUTPUT)/cgroup_helpers.o ++TESTING_HELPERS := $(OUTPUT)/testing_helpers.o ++TRACE_HELPERS := $(OUTPUT)/trace_helpers.o ++ ++$(OUTPUT)/test_dev_cgroup: $(CGROUP_HELPERS) $(TESTING_HELPERS) ++$(OUTPUT)/test_skb_cgroup_id_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) ++$(OUTPUT)/test_sock: $(CGROUP_HELPERS) $(TESTING_HELPERS) ++$(OUTPUT)/test_sock_addr: $(CGROUP_HELPERS) $(TESTING_HELPERS) ++$(OUTPUT)/test_sockmap: $(CGROUP_HELPERS) $(TESTING_HELPERS) ++$(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELPERS) ++$(OUTPUT)/get_cgroup_id_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) ++$(OUTPUT)/test_cgroup_storage: $(CGROUP_HELPERS) $(TESTING_HELPERS) ++$(OUTPUT)/test_sock_fields: $(CGROUP_HELPERS) $(TESTING_HELPERS) ++$(OUTPUT)/test_sysctl: $(CGROUP_HELPERS) $(TESTING_HELPERS) ++$(OUTPUT)/test_tag: $(TESTING_HELPERS) ++$(OUTPUT)/test_lirc_mode2_user: $(TESTING_HELPERS) ++$(OUTPUT)/xdping: $(TESTING_HELPERS) ++$(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS) ++$(OUTPUT)/test_maps: $(TESTING_HELPERS) ++$(OUTPUT)/test_verifier: $(TESTING_HELPERS) + + BPFTOOL ?= $(DEFAULT_BPFTOOL) + $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ +@@ -536,8 +540,8 @@ $(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bp + $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) + $(OUTPUT)/bench: LDLIBS += -lm + $(OUTPUT)/bench: $(OUTPUT)/bench.o \ +- $(OUTPUT)/testing_helpers.o \ +- $(OUTPUT)/trace_helpers.o \ ++ $(TESTING_HELPERS) \ ++ $(TRACE_HELPERS) \ + $(OUTPUT)/bench_count.o \ + $(OUTPUT)/bench_rename.o \ + $(OUTPUT)/bench_trigger.o \ diff --git a/patches.suse/selftests-bpf-Check-bpf_msg_push_data-return-value.patch b/patches.suse/selftests-bpf-Check-bpf_msg_push_data-return-value.patch new file mode 100644 index 0000000..aaebca8 --- /dev/null +++ b/patches.suse/selftests-bpf-Check-bpf_msg_push_data-return-value.patch @@ -0,0 +1,95 @@ +From: Felix Maurer +Date: Fri, 11 Feb 2022 18:43:36 +0100 +Subject: selftests: bpf: Check bpf_msg_push_data return value +Patch-mainline: v5.17-rc6 +Git-commit: 61d06f01f9710b327a53492e5add9f972eb909b3 +References: jsc#PED-1368 + +bpf_msg_push_data may return a non-zero value to indicate an error. The +return value should be checked to prevent undetected errors. + +To indicate an error, the BPF programs now perform a different action +than their intended one to make the userspace test program notice the +error, i.e., the programs supposed to pass/redirect drop, the program +supposed to drop passes. + +Fixes: 84fbfe026acaa ("bpf: test_sockmap add options to use msg_push_data") +Signed-off-by: Felix Maurer +Signed-off-by: Alexei Starovoitov +Acked-by: John Fastabend +Link: https://lore.kernel.org/bpf/89f767bb44005d6b4dd1f42038c438f76b3ebfad.1644601294.git.fmaurer@redhat.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/progs/test_sockmap_kern.h | 26 ++++++++++++------ + 1 file changed, 18 insertions(+), 8 deletions(-) + +--- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h ++++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h +@@ -235,7 +235,7 @@ SEC("sk_msg1") + int bpf_prog4(struct sk_msg_md *msg) + { + int *bytes, zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; +- int *start, *end, *start_push, *end_push, *start_pop, *pop; ++ int *start, *end, *start_push, *end_push, *start_pop, *pop, err = 0; + + bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); + if (bytes) +@@ -249,8 +249,11 @@ int bpf_prog4(struct sk_msg_md *msg) + bpf_msg_pull_data(msg, *start, *end, 0); + start_push = bpf_map_lookup_elem(&sock_bytes, &two); + end_push = bpf_map_lookup_elem(&sock_bytes, &three); +- if (start_push && end_push) +- bpf_msg_push_data(msg, *start_push, *end_push, 0); ++ if (start_push && end_push) { ++ err = bpf_msg_push_data(msg, *start_push, *end_push, 0); ++ if (err) ++ return SK_DROP; ++ } + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) +@@ -263,6 +266,7 @@ int bpf_prog6(struct sk_msg_md *msg) + { + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0; + int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f; ++ int err = 0; + __u64 flags = 0; + + bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); +@@ -279,8 +283,11 @@ int bpf_prog6(struct sk_msg_md *msg) + + start_push = bpf_map_lookup_elem(&sock_bytes, &two); + end_push = bpf_map_lookup_elem(&sock_bytes, &three); +- if (start_push && end_push) +- bpf_msg_push_data(msg, *start_push, *end_push, 0); ++ if (start_push && end_push) { ++ err = bpf_msg_push_data(msg, *start_push, *end_push, 0); ++ if (err) ++ return SK_DROP; ++ } + + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); +@@ -338,7 +345,7 @@ SEC("sk_msg5") + int bpf_prog10(struct sk_msg_md *msg) + { + int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop; +- int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; ++ int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, err = 0; + + bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); + if (bytes) +@@ -352,8 +359,11 @@ int bpf_prog10(struct sk_msg_md *msg) + bpf_msg_pull_data(msg, *start, *end, 0); + start_push = bpf_map_lookup_elem(&sock_bytes, &two); + end_push = bpf_map_lookup_elem(&sock_bytes, &three); +- if (start_push && end_push) +- bpf_msg_push_data(msg, *start_push, *end_push, 0); ++ if (start_push && end_push) { ++ err = bpf_msg_push_data(msg, *start_push, *end_push, 0); ++ if (err) ++ return SK_PASS; ++ } + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) diff --git a/patches.suse/selftests-bpf-Clarify-llvm-dependency-with-btf_tag-s.patch b/patches.suse/selftests-bpf-Clarify-llvm-dependency-with-btf_tag-s.patch new file mode 100644 index 0000000..0e6e583 --- /dev/null +++ b/patches.suse/selftests-bpf-Clarify-llvm-dependency-with-btf_tag-s.patch @@ -0,0 +1,43 @@ +From: Yonghong Song +Date: Thu, 11 Nov 2021 17:26:51 -0800 +Subject: selftests/bpf: Clarify llvm dependency with btf_tag selftest +Patch-mainline: v5.17-rc1 +Git-commit: 3f1d0dc0ba290aab357083a0abfe267c8cffdc8d +References: jsc#PED-1368 + +btf_tag selftest needs certain llvm versions (>= llvm14). +Make it clear in the selftests README.rst file. + +Signed-off-by: Yonghong Song +Signed-off-by: Alexei Starovoitov +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211112012651.1508549-1-yhs@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/README.rst | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/tools/testing/selftests/bpf/README.rst ++++ b/tools/testing/selftests/bpf/README.rst +@@ -204,16 +204,17 @@ __ https://reviews.llvm.org/D93563 + btf_tag test and Clang version + ============================== + +-The btf_tag selftest require LLVM support to recognize the btf_decl_tag attribute. +-It was introduced in `Clang 14`__. ++The btf_tag selftest requires LLVM support to recognize the btf_decl_tag and ++btf_type_tag attributes. They are introduced in `Clang 14` [0_, 1_]. + +-Without it, the btf_tag selftest will be skipped and you will observe: ++Without them, the btf_tag selftest will be skipped and you will observe: + + .. code-block:: console + + # btf_tag:SKIP + +-__ https://reviews.llvm.org/D111588 ++.. _0: https://reviews.llvm.org/D111588 ++.. _1: https://reviews.llvm.org/D111199 + + Clang dependencies for static linking tests + =========================================== diff --git a/patches.suse/selftests-bpf-Clean-up-btf-and-btf_dump-in-dump_data.patch b/patches.suse/selftests-bpf-Clean-up-btf-and-btf_dump-in-dump_data.patch new file mode 100644 index 0000000..d6c1188 --- /dev/null +++ b/patches.suse/selftests-bpf-Clean-up-btf-and-btf_dump-in-dump_data.patch @@ -0,0 +1,50 @@ +From: Andrii Nakryiko +Date: Sun, 7 Nov 2021 08:55:18 -0800 +Subject: selftests/bpf: Clean up btf and btf_dump in dump_datasec test +Patch-mainline: v5.17-rc1 +Git-commit: f79587520a6007a3734b23a3c2eb4c62aa457533 +References: jsc#PED-1368 + +Free up used resources at the end and on error. Also make it more +obvious that there is btf__parse() call that creates struct btf +instance. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Reviewed-by: Hengqi Chen +Link: https://lore.kernel.org/bpf/20211107165521.9240-7-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/btf_dump.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c ++++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c +@@ -799,21 +799,25 @@ static void test_btf_datasec(struct btf + + static void test_btf_dump_datasec_data(char *str) + { +- struct btf *btf = btf__parse("xdping_kern.o", NULL); ++ struct btf *btf; + struct btf_dump_opts opts = { .ctx = str }; + char license[4] = "GPL"; + struct btf_dump *d; + ++ btf = btf__parse("xdping_kern.o", NULL); + if (!ASSERT_OK_PTR(btf, "xdping_kern.o BTF not found")) + return; + + d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf); + if (!ASSERT_OK_PTR(d, "could not create BTF dump")) +- return; ++ goto out; + + test_btf_datasec(btf, d, str, "license", + "SEC(\"license\") char[4] _license = (char[4])['G','P','L',];", + license, sizeof(license)); ++out: ++ btf_dump__free(d); ++ btf__free(btf); + } + + void test_btf_dump() { diff --git a/patches.suse/selftests-bpf-Configure-dir-paths-via-env-in-test_bp.patch b/patches.suse/selftests-bpf-Configure-dir-paths-via-env-in-test_bp.patch new file mode 100644 index 0000000..be78eba --- /dev/null +++ b/patches.suse/selftests-bpf-Configure-dir-paths-via-env-in-test_bp.patch @@ -0,0 +1,115 @@ +From: Quentin Monnet +Date: Mon, 15 Nov 2021 22:58:44 +0000 +Subject: selftests/bpf: Configure dir paths via env in + test_bpftool_synctypes.py +Patch-mainline: v5.17-rc1 +Git-commit: e12cd158c8a45b3926cc2f42151384a2d7fdcec3 +References: jsc#PED-1368 + +Script test_bpftool_synctypes.py parses a number of files in the bpftool +directory (or even elsewhere in the repo) to make sure that the list of +types or options in those different files are consistent. Instead of +having fixed paths, let's make the directories configurable through +environment variable. This should make easier in the future to run the +script in a different setup, for example on an out-of-tree bpftool +mirror with a different layout. + +Signed-off-by: Quentin Monnet +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211115225844.33943-4-quentin@isovalent.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/test_bpftool_synctypes.py | 26 +++++++++++------- + 1 file changed, 17 insertions(+), 9 deletions(-) + +--- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py ++++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py +@@ -9,7 +9,15 @@ import os, sys + + LINUX_ROOT = os.path.abspath(os.path.join(__file__, + os.pardir, os.pardir, os.pardir, os.pardir, os.pardir)) +-BPFTOOL_DIR = os.path.join(LINUX_ROOT, 'tools/bpf/bpftool') ++BPFTOOL_DIR = os.getenv('BPFTOOL_DIR', ++ os.path.join(LINUX_ROOT, 'tools/bpf/bpftool')) ++BPFTOOL_BASHCOMP_DIR = os.getenv('BPFTOOL_BASHCOMP_DIR', ++ os.path.join(BPFTOOL_DIR, 'bash-completion')) ++BPFTOOL_DOC_DIR = os.getenv('BPFTOOL_DOC_DIR', ++ os.path.join(BPFTOOL_DIR, 'Documentation')) ++INCLUDE_DIR = os.getenv('INCLUDE_DIR', ++ os.path.join(LINUX_ROOT, 'tools/include')) ++ + retval = 0 + + class BlockParser(object): +@@ -300,7 +308,7 @@ class ManSubstitutionsExtractor(SourceFi + """ + An extractor for substitutions.rst + """ +- filename = os.path.join(BPFTOOL_DIR, 'Documentation/substitutions.rst') ++ filename = os.path.join(BPFTOOL_DOC_DIR, 'substitutions.rst') + + def get_common_options(self): + """ +@@ -393,7 +401,7 @@ class BpfHeaderExtractor(FileExtractor): + """ + An extractor for the UAPI BPF header. + """ +- filename = os.path.join(LINUX_ROOT, 'tools/include/uapi/linux/bpf.h') ++ filename = os.path.join(INCLUDE_DIR, 'uapi/linux/bpf.h') + + def get_prog_types(self): + return self.get_enum('bpf_prog_type') +@@ -417,7 +425,7 @@ class ManProgExtractor(ManPageExtractor) + """ + An extractor for bpftool-prog.rst. + """ +- filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-prog.rst') ++ filename = os.path.join(BPFTOOL_DOC_DIR, 'bpftool-prog.rst') + + def get_attach_types(self): + return self.get_rst_list('ATTACH_TYPE') +@@ -426,7 +434,7 @@ class ManMapExtractor(ManPageExtractor): + """ + An extractor for bpftool-map.rst. + """ +- filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-map.rst') ++ filename = os.path.join(BPFTOOL_DOC_DIR, 'bpftool-map.rst') + + def get_map_types(self): + return self.get_rst_list('TYPE') +@@ -435,7 +443,7 @@ class ManCgroupExtractor(ManPageExtracto + """ + An extractor for bpftool-cgroup.rst. + """ +- filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-cgroup.rst') ++ filename = os.path.join(BPFTOOL_DOC_DIR, 'bpftool-cgroup.rst') + + def get_attach_types(self): + return self.get_rst_list('ATTACH_TYPE') +@@ -454,7 +462,7 @@ class BashcompExtractor(FileExtractor): + """ + An extractor for bpftool's bash completion file. + """ +- filename = os.path.join(BPFTOOL_DIR, 'bash-completion/bpftool') ++ filename = os.path.join(BPFTOOL_BASHCOMP_DIR, 'bpftool') + + def get_prog_attach_types(self): + return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES') +@@ -605,7 +613,7 @@ def main(): + help_cmd_options = source_info.get_options() + source_info.close() + +- man_cmd_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool-' + cmd + '.rst')) ++ man_cmd_info = ManGenericExtractor(os.path.join(BPFTOOL_DOC_DIR, 'bpftool-' + cmd + '.rst')) + man_cmd_options = man_cmd_info.get_options() + man_cmd_info.close() + +@@ -616,7 +624,7 @@ def main(): + help_main_options = source_main_info.get_options() + source_main_info.close() + +- man_main_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool.rst')) ++ man_main_info = ManGenericExtractor(os.path.join(BPFTOOL_DOC_DIR, 'bpftool.rst')) + man_main_options = man_main_info.get_options() + man_main_info.close() + diff --git a/patches.suse/selftests-bpf-Convert-legacy-prog-load-APIs-to-bpf_p.patch b/patches.suse/selftests-bpf-Convert-legacy-prog-load-APIs-to-bpf_p.patch new file mode 100644 index 0000000..a1cc626 --- /dev/null +++ b/patches.suse/selftests-bpf-Convert-legacy-prog-load-APIs-to-bpf_p.patch @@ -0,0 +1,433 @@ +From: Andrii Nakryiko +Date: Wed, 3 Nov 2021 15:08:42 -0700 +Subject: selftests/bpf: Convert legacy prog load APIs to bpf_prog_load() +Patch-mainline: v5.17-rc1 +Git-commit: d8e86407e5fc6c3da1e336f89bd3e9bbc1c0cf60 +References: jsc#PED-1368 + +Convert all the uses of legacy low-level BPF program loading APIs +(mostly bpf_load_program_xattr(), but also some bpf_verify_program()) to +bpf_prog_load() uses. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211103220845.2676888-10-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/align.c | 11 ++ + tools/testing/selftests/bpf/prog_tests/fexit_stress.c | 33 ++++---- + tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c | 14 +-- + tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c | 29 +++---- + tools/testing/selftests/bpf/prog_tests/sockopt.c | 19 ++--- + tools/testing/selftests/bpf/test_lru_map.c | 9 -- + tools/testing/selftests/bpf/test_sock.c | 19 ++--- + tools/testing/selftests/bpf/test_sock_addr.c | 13 +-- + tools/testing/selftests/bpf/test_sysctl.c | 22 ++--- + tools/testing/selftests/bpf/test_verifier.c | 38 +++++----- + 10 files changed, 97 insertions(+), 110 deletions(-) + +--- a/tools/testing/selftests/bpf/prog_tests/align.c ++++ b/tools/testing/selftests/bpf/prog_tests/align.c +@@ -594,6 +594,12 @@ static int do_test_single(struct bpf_ali + struct bpf_insn *prog = test->insns; + int prog_type = test->prog_type; + char bpf_vlog_copy[32768]; ++ LIBBPF_OPTS(bpf_prog_load_opts, opts, ++ .prog_flags = BPF_F_STRICT_ALIGNMENT, ++ .log_buf = bpf_vlog, ++ .log_size = sizeof(bpf_vlog), ++ .log_level = 2, ++ ); + const char *line_ptr; + int cur_line = -1; + int prog_len, i; +@@ -601,9 +607,8 @@ static int do_test_single(struct bpf_ali + int ret; + + prog_len = probe_filter_length(prog); +- fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, +- prog, prog_len, BPF_F_STRICT_ALIGNMENT, +- "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 2); ++ fd_prog = bpf_prog_load(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", ++ prog, prog_len, &opts); + if (fd_prog < 0 && test->result != REJECT) { + printf("Failed to load program.\n"); + printf("%s", bpf_vlog); +--- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c ++++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c +@@ -20,34 +20,33 @@ void test_fexit_stress(void) + BPF_EXIT_INSN(), + }; + +- struct bpf_load_program_attr load_attr = { +- .prog_type = BPF_PROG_TYPE_TRACING, +- .license = "GPL", +- .insns = trace_program, +- .insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn), ++ LIBBPF_OPTS(bpf_prog_load_opts, trace_opts, + .expected_attach_type = BPF_TRACE_FEXIT, +- }; ++ .log_buf = error, ++ .log_size = sizeof(error), ++ ); + + const struct bpf_insn skb_program[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + +- struct bpf_load_program_attr skb_load_attr = { +- .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, +- .license = "GPL", +- .insns = skb_program, +- .insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn), +- }; ++ LIBBPF_OPTS(bpf_prog_load_opts, skb_opts, ++ .log_buf = error, ++ .log_size = sizeof(error), ++ ); + + err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1", +- load_attr.expected_attach_type); ++ trace_opts.expected_attach_type); + if (CHECK(err <= 0, "find_vmlinux_btf_id", "failed: %d\n", err)) + goto out; +- load_attr.attach_btf_id = err; ++ trace_opts.attach_btf_id = err; + + for (i = 0; i < CNT; i++) { +- fexit_fd[i] = bpf_load_program_xattr(&load_attr, error, sizeof(error)); ++ fexit_fd[i] = bpf_prog_load(BPF_PROG_TYPE_TRACING, NULL, "GPL", ++ trace_program, ++ sizeof(trace_program) / sizeof(struct bpf_insn), ++ &trace_opts); + if (CHECK(fexit_fd[i] < 0, "fexit loaded", + "failed: %d errno %d\n", fexit_fd[i], errno)) + goto out; +@@ -57,7 +56,9 @@ void test_fexit_stress(void) + goto out; + } + +- filter_fd = bpf_load_program_xattr(&skb_load_attr, error, sizeof(error)); ++ filter_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", ++ skb_program, sizeof(skb_program) / sizeof(struct bpf_insn), ++ &skb_opts); + if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n", + filter_fd, errno)) + goto out; +--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c ++++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c +@@ -18,15 +18,15 @@ void test_raw_tp_writable_reject_nbd_inv + BPF_EXIT_INSN(), + }; + +- struct bpf_load_program_attr load_attr = { +- .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, +- .license = "GPL v2", +- .insns = program, +- .insns_cnt = sizeof(program) / sizeof(struct bpf_insn), ++ LIBBPF_OPTS(bpf_prog_load_opts, opts, + .log_level = 2, +- }; ++ .log_buf = error, ++ .log_size = sizeof(error), ++ ); + +- bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error)); ++ bpf_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, NULL, "GPL v2", ++ program, sizeof(program) / sizeof(struct bpf_insn), ++ &opts); + if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable load", + "failed: %d errno %d\n", bpf_fd, errno)) + return; +--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c ++++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c +@@ -17,15 +17,15 @@ void serial_test_raw_tp_writable_test_ru + BPF_EXIT_INSN(), + }; + +- struct bpf_load_program_attr load_attr = { +- .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, +- .license = "GPL v2", +- .insns = trace_program, +- .insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn), ++ LIBBPF_OPTS(bpf_prog_load_opts, trace_opts, + .log_level = 2, +- }; ++ .log_buf = error, ++ .log_size = sizeof(error), ++ ); + +- int bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error)); ++ int bpf_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, NULL, "GPL v2", ++ trace_program, sizeof(trace_program) / sizeof(struct bpf_insn), ++ &trace_opts); + if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable loaded", + "failed: %d errno %d\n", bpf_fd, errno)) + return; +@@ -35,15 +35,14 @@ void serial_test_raw_tp_writable_test_ru + BPF_EXIT_INSN(), + }; + +- struct bpf_load_program_attr skb_load_attr = { +- .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, +- .license = "GPL v2", +- .insns = skb_program, +- .insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn), +- }; ++ LIBBPF_OPTS(bpf_prog_load_opts, skb_opts, ++ .log_buf = error, ++ .log_size = sizeof(error), ++ ); + +- int filter_fd = +- bpf_load_program_xattr(&skb_load_attr, error, sizeof(error)); ++ int filter_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL v2", ++ skb_program, sizeof(skb_program) / sizeof(struct bpf_insn), ++ &skb_opts); + if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n", + filter_fd, errno)) + goto out_bpffd; +--- a/tools/testing/selftests/bpf/prog_tests/sockopt.c ++++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c +@@ -852,22 +852,21 @@ static struct sockopt_test { + static int load_prog(const struct bpf_insn *insns, + enum bpf_attach_type expected_attach_type) + { +- struct bpf_load_program_attr attr = { +- .prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT, ++ LIBBPF_OPTS(bpf_prog_load_opts, opts, + .expected_attach_type = expected_attach_type, +- .insns = insns, +- .license = "GPL", + .log_level = 2, +- }; +- int fd; ++ .log_buf = bpf_log_buf, ++ .log_size = sizeof(bpf_log_buf), ++ ); ++ int fd, insns_cnt = 0; + + for (; +- insns[attr.insns_cnt].code != (BPF_JMP | BPF_EXIT); +- attr.insns_cnt++) { ++ insns[insns_cnt].code != (BPF_JMP | BPF_EXIT); ++ insns_cnt++) { + } +- attr.insns_cnt++; ++ insns_cnt++; + +- fd = bpf_load_program_xattr(&attr, bpf_log_buf, sizeof(bpf_log_buf)); ++ fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCKOPT, NULL, "GPL", insns, insns_cnt, &opts); + if (verbose && fd < 0) + fprintf(stderr, "%s\n", bpf_log_buf); + +--- a/tools/testing/selftests/bpf/test_lru_map.c ++++ b/tools/testing/selftests/bpf/test_lru_map.c +@@ -42,7 +42,6 @@ static int create_map(int map_type, int + static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key, + void *value) + { +- struct bpf_load_program_attr prog; + struct bpf_create_map_attr map; + struct bpf_insn insns[] = { + BPF_LD_MAP_VALUE(BPF_REG_9, 0, 0), +@@ -76,13 +75,7 @@ static int bpf_map_lookup_elem_with_ref_ + + insns[0].imm = mfd; + +- memset(&prog, 0, sizeof(prog)); +- prog.prog_type = BPF_PROG_TYPE_SCHED_CLS; +- prog.insns = insns; +- prog.insns_cnt = ARRAY_SIZE(insns); +- prog.license = "GPL"; +- +- pfd = bpf_load_program_xattr(&prog, NULL, 0); ++ pfd = bpf_prog_load(BPF_PROG_TYPE_SCHED_CLS, NULL, "GPL", insns, ARRAY_SIZE(insns), NULL); + if (pfd < 0) { + close(mfd); + return -1; +--- a/tools/testing/selftests/bpf/test_sock.c ++++ b/tools/testing/selftests/bpf/test_sock.c +@@ -328,18 +328,17 @@ static size_t probe_prog_length(const st + static int load_sock_prog(const struct bpf_insn *prog, + enum bpf_attach_type attach_type) + { +- struct bpf_load_program_attr attr; +- int ret; ++ LIBBPF_OPTS(bpf_prog_load_opts, opts); ++ int ret, insn_cnt; + +- memset(&attr, 0, sizeof(struct bpf_load_program_attr)); +- attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK; +- attr.expected_attach_type = attach_type; +- attr.insns = prog; +- attr.insns_cnt = probe_prog_length(attr.insns); +- attr.license = "GPL"; +- attr.log_level = 2; ++ insn_cnt = probe_prog_length(prog); + +- ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE); ++ opts.expected_attach_type = attach_type; ++ opts.log_buf = bpf_log_buf; ++ opts.log_size = BPF_LOG_BUF_SIZE; ++ opts.log_level = 2; ++ ++ ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", prog, insn_cnt, &opts); + if (verbose && ret < 0) + fprintf(stderr, "%s\n", bpf_log_buf); + +--- a/tools/testing/selftests/bpf/test_sock_addr.c ++++ b/tools/testing/selftests/bpf/test_sock_addr.c +@@ -645,17 +645,14 @@ static int mk_sockaddr(int domain, const + static int load_insns(const struct sock_addr_test *test, + const struct bpf_insn *insns, size_t insns_cnt) + { +- struct bpf_load_program_attr load_attr; ++ LIBBPF_OPTS(bpf_prog_load_opts, opts); + int ret; + +- memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); +- load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; +- load_attr.expected_attach_type = test->expected_attach_type; +- load_attr.insns = insns; +- load_attr.insns_cnt = insns_cnt; +- load_attr.license = "GPL"; ++ opts.expected_attach_type = test->expected_attach_type; ++ opts.log_buf = bpf_log_buf; ++ opts.log_size = BPF_LOG_BUF_SIZE; + +- ret = bpf_load_program_xattr(&load_attr, bpf_log_buf, BPF_LOG_BUF_SIZE); ++ ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, NULL, "GPL", insns, insns_cnt, &opts); + if (ret < 0 && test->expected_result != LOAD_REJECT) { + log_err(">>> Loading program error.\n" + ">>> Verifier output:\n%s\n-------\n", bpf_log_buf); +--- a/tools/testing/selftests/bpf/test_sysctl.c ++++ b/tools/testing/selftests/bpf/test_sysctl.c +@@ -1435,14 +1435,10 @@ static int load_sysctl_prog_insns(struct + const char *sysctl_path) + { + struct bpf_insn *prog = test->insns; +- struct bpf_load_program_attr attr; +- int ret; ++ LIBBPF_OPTS(bpf_prog_load_opts, opts); ++ int ret, insn_cnt; + +- memset(&attr, 0, sizeof(struct bpf_load_program_attr)); +- attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL; +- attr.insns = prog; +- attr.insns_cnt = probe_prog_length(attr.insns); +- attr.license = "GPL"; ++ insn_cnt = probe_prog_length(prog); + + if (test->fixup_value_insn) { + char buf[128]; +@@ -1465,7 +1461,10 @@ static int load_sysctl_prog_insns(struct + return -1; + } + +- ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE); ++ opts.log_buf = bpf_log_buf; ++ opts.log_size = BPF_LOG_BUF_SIZE; ++ ++ ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SYSCTL, NULL, "GPL", prog, insn_cnt, &opts); + if (ret < 0 && test->result != LOAD_REJECT) { + log_err(">>> Loading program error.\n" + ">>> Verifier output:\n%s\n-------\n", bpf_log_buf); +@@ -1476,15 +1475,10 @@ static int load_sysctl_prog_insns(struct + + static int load_sysctl_prog_file(struct sysctl_test *test) + { +- struct bpf_prog_load_attr attr; + struct bpf_object *obj; + int prog_fd; + +- memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); +- attr.file = test->prog_file; +- attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL; +- +- if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) { ++ if (bpf_prog_test_load(test->prog_file, BPF_PROG_TYPE_CGROUP_SYSCTL, &obj, &prog_fd)) { + if (test->result != LOAD_REJECT) + log_err(">>> Loading program (%s) error.\n", + test->prog_file); +--- a/tools/testing/selftests/bpf/test_verifier.c ++++ b/tools/testing/selftests/bpf/test_verifier.c +@@ -495,8 +495,7 @@ static int create_prog_dummy_simple(enum + BPF_EXIT_INSN(), + }; + +- return bpf_load_program(prog_type, prog, +- ARRAY_SIZE(prog), "GPL", 0, NULL, 0); ++ return bpf_prog_load(prog_type, NULL, "GPL", prog, ARRAY_SIZE(prog), NULL); + } + + static int create_prog_dummy_loop(enum bpf_prog_type prog_type, int mfd, +@@ -511,8 +510,7 @@ static int create_prog_dummy_loop(enum b + BPF_EXIT_INSN(), + }; + +- return bpf_load_program(prog_type, prog, +- ARRAY_SIZE(prog), "GPL", 0, NULL, 0); ++ return bpf_prog_load(prog_type, NULL, "GPL", prog, ARRAY_SIZE(prog), NULL); + } + + static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem, +@@ -1085,7 +1083,7 @@ static void do_test_single(struct bpf_te + int fd_prog, expected_ret, alignment_prevented_execution; + int prog_len, prog_type = test->prog_type; + struct bpf_insn *prog = test->insns; +- struct bpf_load_program_attr attr; ++ LIBBPF_OPTS(bpf_prog_load_opts, opts); + int run_errs, run_successes; + int map_fds[MAX_NR_MAPS]; + const char *expected_err; +@@ -1125,32 +1123,34 @@ static void do_test_single(struct bpf_te + test->result_unpriv : test->result; + expected_err = unpriv && test->errstr_unpriv ? + test->errstr_unpriv : test->errstr; +- memset(&attr, 0, sizeof(attr)); +- attr.prog_type = prog_type; +- attr.expected_attach_type = test->expected_attach_type; +- attr.insns = prog; +- attr.insns_cnt = prog_len; +- attr.license = "GPL"; ++ ++ opts.expected_attach_type = test->expected_attach_type; + if (verbose) +- attr.log_level = 1; ++ opts.log_level = 1; + else if (expected_ret == VERBOSE_ACCEPT) +- attr.log_level = 2; ++ opts.log_level = 2; + else +- attr.log_level = 4; +- attr.prog_flags = pflags; ++ opts.log_level = 4; ++ opts.prog_flags = pflags; + + if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) { +- attr.attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc, +- attr.expected_attach_type); +- if (attr.attach_btf_id < 0) { ++ int attach_btf_id; ++ ++ attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc, ++ opts.expected_attach_type); ++ if (attach_btf_id < 0) { + printf("FAIL\nFailed to find BTF ID for '%s'!\n", + test->kfunc); + (*errors)++; + return; + } ++ ++ opts.attach_btf_id = attach_btf_id; + } + +- fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog)); ++ opts.log_buf = bpf_vlog; ++ opts.log_size = sizeof(bpf_vlog); ++ fd_prog = bpf_prog_load(prog_type, NULL, "GPL", prog, prog_len, &opts); + saved_errno = errno; + + /* BPF_PROG_TYPE_TRACING requires more setup and diff --git a/patches.suse/selftests-bpf-Convert-map_ptr_kern-test-to-use-light.patch b/patches.suse/selftests-bpf-Convert-map_ptr_kern-test-to-use-light.patch new file mode 100644 index 0000000..8f6b08b --- /dev/null +++ b/patches.suse/selftests-bpf-Convert-map_ptr_kern-test-to-use-light.patch @@ -0,0 +1,79 @@ +From: Alexei Starovoitov +Date: Wed, 1 Dec 2021 10:10:37 -0800 +Subject: selftests/bpf: Convert map_ptr_kern test to use light skeleton. +Patch-mainline: v5.17-rc1 +Git-commit: 650c9dbd101ba7d7180f4e77deb1c273f4ea5ca3 +References: jsc#PED-1368 + +To exercise CO-RE in the kernel further convert map_ptr_kern +test to light skeleton. + +Signed-off-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211201181040.23337-15-alexei.starovoitov@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/Makefile | 3 ++- + tools/testing/selftests/bpf/prog_tests/map_ptr.c | 16 +++++++--------- + 2 files changed, 9 insertions(+), 10 deletions(-) + +--- a/tools/testing/selftests/bpf/Makefile ++++ b/tools/testing/selftests/bpf/Makefile +@@ -325,7 +325,8 @@ LINKED_SKELS := test_static_linked.skel. + linked_vars.skel.h linked_maps.skel.h + + LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \ +- test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c ++ test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c \ ++ map_ptr_kern.c + # Generate both light skeleton and libbpf skeleton for these + LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test_subprog.c + SKEL_BLACKLIST += $$(LSKELS) +--- a/tools/testing/selftests/bpf/prog_tests/map_ptr.c ++++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c +@@ -4,31 +4,29 @@ + #include + #include + +-#include "map_ptr_kern.skel.h" ++#include "map_ptr_kern.lskel.h" + + void test_map_ptr(void) + { +- struct map_ptr_kern *skel; ++ struct map_ptr_kern_lskel *skel; + __u32 duration = 0, retval; + char buf[128]; + int err; + int page_size = getpagesize(); + +- skel = map_ptr_kern__open(); ++ skel = map_ptr_kern_lskel__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + +- err = bpf_map__set_max_entries(skel->maps.m_ringbuf, page_size); +- if (!ASSERT_OK(err, "bpf_map__set_max_entries")) +- goto cleanup; ++ skel->maps.m_ringbuf.max_entries = page_size; + +- err = map_ptr_kern__load(skel); ++ err = map_ptr_kern_lskel__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + skel->bss->page_size = page_size; + +- err = bpf_prog_test_run(bpf_program__fd(skel->progs.cg_skb), 1, &pkt_v4, ++ err = bpf_prog_test_run(skel->progs.cg_skb.prog_fd, 1, &pkt_v4, + sizeof(pkt_v4), buf, NULL, &retval, NULL); + + if (CHECK(err, "test_run", "err=%d errno=%d\n", err, errno)) +@@ -39,5 +37,5 @@ void test_map_ptr(void) + goto cleanup; + + cleanup: +- map_ptr_kern__destroy(skel); ++ map_ptr_kern_lskel__destroy(skel); + } diff --git a/patches.suse/selftests-bpf-Correct-the-INDEX-address-in-vmtest.sh.patch b/patches.suse/selftests-bpf-Correct-the-INDEX-address-in-vmtest.sh.patch new file mode 100644 index 0000000..7233773 --- /dev/null +++ b/patches.suse/selftests-bpf-Correct-the-INDEX-address-in-vmtest.sh.patch @@ -0,0 +1,31 @@ +From: Pu Lehui +Date: Mon, 20 Dec 2021 05:08:03 +0000 +Subject: selftests/bpf: Correct the INDEX address in vmtest.sh +Patch-mainline: v5.17-rc1 +Git-commit: 426b87b111b0523f957354bc97ec7eb16e8be1e2 +References: jsc#PED-1368 + +Migration of vmtest to libbpf/ci will change the address +of INDEX in vmtest.sh, which will cause vmtest.sh to not +work due to the failure of rootfs fetching. + +Signed-off-by: Pu Lehui +Signed-off-by: Andrii Nakryiko +Tested-by: Lorenzo Fontana +Link: https://lore.kernel.org/bpf/20211220050803.2670677-1-pulehui@huawei.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/vmtest.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/testing/selftests/bpf/vmtest.sh ++++ b/tools/testing/selftests/bpf/vmtest.sh +@@ -32,7 +32,7 @@ ROOTFS_IMAGE="root.img" + OUTPUT_DIR="$HOME/.bpf_selftests" + KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/config-latest.${ARCH}" + KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/config-latest.${ARCH}" +-INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX" ++INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX" + NUM_COMPILE_JOBS="$(nproc)" + LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")" + LOG_FILE="${LOG_FILE_BASE}.log" diff --git a/patches.suse/selftests-bpf-Destroy-XDP-link-correctly.patch b/patches.suse/selftests-bpf-Destroy-XDP-link-correctly.patch new file mode 100644 index 0000000..76a8b8a --- /dev/null +++ b/patches.suse/selftests-bpf-Destroy-XDP-link-correctly.patch @@ -0,0 +1,32 @@ +From: Andrii Nakryiko +Date: Sun, 7 Nov 2021 08:55:20 -0800 +Subject: selftests/bpf: Destroy XDP link correctly +Patch-mainline: v5.17-rc1 +Git-commit: f91231eeeed752119f49eb6620cae44ec745a007 +References: jsc#PED-1368 + +bpf_link__detach() was confused with bpf_link__destroy() and leaves +leaked FD in the process. Fix the problem. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Reviewed-by: Hengqi Chen +Link: https://lore.kernel.org/bpf/20211107165521.9240-9-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c ++++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c +@@ -204,8 +204,8 @@ static int pass_ack(struct migrate_reuse + { + int err; + +- err = bpf_link__detach(test_case->link); +- if (!ASSERT_OK(err, "bpf_link__detach")) ++ err = bpf_link__destroy(test_case->link); ++ if (!ASSERT_OK(err, "bpf_link__destroy")) + return -1; + + test_case->link = NULL; diff --git a/patches.suse/selftests-bpf-Don-t-rely-on-preserving-volatile-in-P.patch b/patches.suse/selftests-bpf-Don-t-rely-on-preserving-volatile-in-P.patch new file mode 100644 index 0000000..d10aa6e --- /dev/null +++ b/patches.suse/selftests-bpf-Don-t-rely-on-preserving-volatile-in-P.patch @@ -0,0 +1,41 @@ +From: Andrii Nakryiko +Date: Thu, 6 Jan 2022 12:51:56 -0800 +Subject: selftests/bpf: Don't rely on preserving volatile in PT_REGS macros in + loop3 +Patch-mainline: v5.17-rc1 +Git-commit: 70bc793382a0e37ba4e35e4d1a317b280b829a44 +References: jsc#PED-1368 + +PT_REGS*() macro on some architectures force-cast struct pt_regs to +other types (user_pt_regs, etc) and might drop volatile modifiers, if any. +Volatile isn't really required as pt_regs value isn't supposed to change +during the BPF program run, so this is correct behavior. + +But progs/loop3.c relies on that volatile modifier to ensure that loop +is preserved. Fix loop3.c by declaring i and sum variables as volatile +instead. It preserves the loop and makes the test pass on all +architectures (including s390x which is currently broken). + +Fixes: 3cc31d794097 ("libbpf: Normalize PT_REGS_xxx() macro definitions") +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20220106205156.955373-1-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/progs/loop3.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/tools/testing/selftests/bpf/progs/loop3.c ++++ b/tools/testing/selftests/bpf/progs/loop3.c +@@ -12,9 +12,9 @@ + char _license[] SEC("license") = "GPL"; + + SEC("raw_tracepoint/consume_skb") +-int while_true(volatile struct pt_regs* ctx) ++int while_true(struct pt_regs *ctx) + { +- __u64 i = 0, sum = 0; ++ volatile __u64 i = 0, sum = 0; + do { + i++; + sum += PT_REGS_RC(ctx); diff --git a/patches.suse/selftests-bpf-Enable-cross-building-with-clang.patch b/patches.suse/selftests-bpf-Enable-cross-building-with-clang.patch new file mode 100644 index 0000000..8ebe30d --- /dev/null +++ b/patches.suse/selftests-bpf-Enable-cross-building-with-clang.patch @@ -0,0 +1,59 @@ +From: Jean-Philippe Brucker +Date: Thu, 16 Dec 2021 16:38:43 +0000 +Subject: selftests/bpf: Enable cross-building with clang +Patch-mainline: v5.17-rc1 +Git-commit: ea79020a2d9eea62b12d90f0c11b7d70fcadc172 +References: jsc#PED-1368 + +Cross building using clang requires passing the "-target" flag rather +than using the CROSS_COMPILE prefix. Makefile.include transforms +CROSS_COMPILE into CLANG_CROSS_FLAGS. Clear CROSS_COMPILE for bpftool +and the host libbpf, and use the clang flags for urandom_read and bench. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: Andrii Nakryiko +Acked-by: Quentin Monnet +Link: https://lore.kernel.org/bpf/20211216163842.829836-7-jean-philippe@linaro.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/Makefile | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/tools/testing/selftests/bpf/Makefile ++++ b/tools/testing/selftests/bpf/Makefile +@@ -170,7 +170,7 @@ $(OUTPUT)/%:%.c + + $(OUTPUT)/urandom_read: urandom_read.c + $(call msg,BINARY,,$@) +- $(Q)$(CC) $(LDFLAGS) $< $(LDLIBS) -Wl,--build-id=sha1 -o $@ ++ $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $< $(LDLIBS) -Wl,--build-id=sha1 -o $@ + + $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) + $(call msg,MOD,,$@) +@@ -217,7 +217,7 @@ BPFTOOL ?= $(DEFAULT_BPFTOOL) + $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ + $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool + $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ +- CC=$(HOSTCC) LD=$(HOSTLD) \ ++ ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) \ + EXTRA_CFLAGS='-g -O0' \ + OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ + LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ +@@ -248,7 +248,7 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ + $(APIDIR)/linux/bpf.h \ + | $(HOST_BUILD_DIR)/libbpf + $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \ +- EXTRA_CFLAGS='-g -O0' \ ++ EXTRA_CFLAGS='-g -O0' ARCH= CROSS_COMPILE= \ + OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \ + DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers + endif +@@ -551,7 +551,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ + $(OUTPUT)/bench_bpf_loop.o \ + $(OUTPUT)/bench_strncmp.o + $(call msg,BINARY,,$@) +- $(Q)$(CC) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ ++ $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ + + EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ + prog_tests/tests.h map_tests/tests.h verifier/tests.h \ diff --git a/patches.suse/selftests-bpf-Fix-UBSan-complaint-about-signed-__int.patch b/patches.suse/selftests-bpf-Fix-UBSan-complaint-about-signed-__int.patch new file mode 100644 index 0000000..ac97a6d --- /dev/null +++ b/patches.suse/selftests-bpf-Fix-UBSan-complaint-about-signed-__int.patch @@ -0,0 +1,30 @@ +From: Andrii Nakryiko +Date: Tue, 23 Nov 2021 16:23:19 -0800 +Subject: selftests/bpf: Fix UBSan complaint about signed __int128 overflow +Patch-mainline: v5.17-rc1 +Git-commit: 486e648cb2f170702fc05f777c7b6b3d8ec662ce +References: jsc#PED-1368 + +Test is using __int128 variable as unsigned and highest order bit can be +set to 1 after bit shift. Use unsigned __int128 explicitly and prevent +UBSan from complaining. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211124002325.1737739-8-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/btf_dump.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c ++++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c +@@ -323,7 +323,7 @@ static void test_btf_dump_int_data(struc + char *str) + { + #ifdef __SIZEOF_INT128__ +- __int128 i = 0xffffffffffffffff; ++ unsigned __int128 i = 0xffffffffffffffff; + + /* this dance is required because we cannot directly initialize + * a 128-bit value to anything larger than a 64-bit value. diff --git a/patches.suse/selftests-bpf-Fix-a-compilation-warning.patch b/patches.suse/selftests-bpf-Fix-a-compilation-warning.patch new file mode 100644 index 0000000..5a21609 --- /dev/null +++ b/patches.suse/selftests-bpf-Fix-a-compilation-warning.patch @@ -0,0 +1,46 @@ +From: Yonghong Song +Date: Wed, 8 Dec 2021 21:04:03 -0800 +Subject: selftests/bpf: Fix a compilation warning +Patch-mainline: v5.17-rc1 +Git-commit: b540358e6c4d86eb450f3539aea198653e656641 +References: jsc#PED-1368 + +The following warning is triggered when I used clang compiler +to build the selftest. + + /.../prog_tests/btf_dedup_split.c:368:6: warning: variable 'btf2' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] + if (!ASSERT_OK(err, "btf_dedup")) + ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ + /.../prog_tests/btf_dedup_split.c:424:12: note: uninitialized use occurs here + btf__free(btf2); + ^~~~ + /.../prog_tests/btf_dedup_split.c:368:2: note: remove the 'if' if its condition is always false + if (!ASSERT_OK(err, "btf_dedup")) + ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + /.../prog_tests/btf_dedup_split.c:343:25: note: initialize the variable 'btf2' to silence this warning + struct btf *btf1, *btf2; + ^ + = NULL + +Initialize local variable btf2 = NULL and the warning is gone. + +Fixes: 9a49afe6f5a5 ("selftests/bpf: Add btf_dedup case with duplicated structs within CU") +Signed-off-by: Yonghong Song +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211209050403.1770836-1-yhs@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c ++++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c +@@ -340,7 +340,7 @@ static void btf_add_dup_struct_in_cu(str + + static void test_split_dup_struct_in_cu() + { +- struct btf *btf1, *btf2; ++ struct btf *btf1, *btf2 = NULL; + int err; + + /* generate the base data.. */ diff --git a/patches.suse/selftests-bpf-Fix-a-tautological-constant-out-of-ran.patch b/patches.suse/selftests-bpf-Fix-a-tautological-constant-out-of-ran.patch new file mode 100644 index 0000000..197bb71 --- /dev/null +++ b/patches.suse/selftests-bpf-Fix-a-tautological-constant-out-of-ran.patch @@ -0,0 +1,75 @@ +From: Yonghong Song +Date: Fri, 12 Nov 2021 12:48:38 -0800 +Subject: selftests/bpf: Fix a tautological-constant-out-of-range-compare + compiler warning +Patch-mainline: v5.17-rc1 +Git-commit: 325d956d67178af92b5b12ff950a2f93a433f2c4 +References: jsc#PED-1368 + +When using clang to build selftests with LLVM=1 in make commandline, +I hit the following compiler warning: + + benchs/bench_bloom_filter_map.c:84:46: warning: result of comparison of constant 256 + with expression of type '__u8' (aka 'unsigned char') is always false + [-Wtautological-constant-out-of-range-compare] + if (args.value_size < 2 || args.value_size > 256) { + ~~~~~~~~~~~~~~~ ^ ~~~ + +The reason is arg.vaue_size has type __u8, so comparison "args.value_size > 256" +is always false. + +This patch fixed the issue by doing proper comparison before assigning the +value to args.value_size. The patch also fixed the same issue in two +other places. + +Signed-off-by: Yonghong Song +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211112204838.3579953-1-yhs@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c | 17 +++++++----- + 1 file changed, 11 insertions(+), 6 deletions(-) + +--- a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c ++++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c +@@ -63,29 +63,34 @@ static const struct argp_option opts[] = + + static error_t parse_arg(int key, char *arg, struct argp_state *state) + { ++ long ret; ++ + switch (key) { + case ARG_NR_ENTRIES: +- args.nr_entries = strtol(arg, NULL, 10); +- if (args.nr_entries == 0) { ++ ret = strtol(arg, NULL, 10); ++ if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "Invalid nr_entries count."); + argp_usage(state); + } ++ args.nr_entries = ret; + break; + case ARG_NR_HASH_FUNCS: +- args.nr_hash_funcs = strtol(arg, NULL, 10); +- if (args.nr_hash_funcs == 0 || args.nr_hash_funcs > 15) { ++ ret = strtol(arg, NULL, 10); ++ if (ret < 1 || ret > 15) { + fprintf(stderr, + "The bloom filter must use 1 to 15 hash functions."); + argp_usage(state); + } ++ args.nr_hash_funcs = ret; + break; + case ARG_VALUE_SIZE: +- args.value_size = strtol(arg, NULL, 10); +- if (args.value_size < 2 || args.value_size > 256) { ++ ret = strtol(arg, NULL, 10); ++ if (ret < 2 || ret > 256) { + fprintf(stderr, + "Invalid value size. Must be between 2 and 256 bytes"); + argp_usage(state); + } ++ args.value_size = ret; + break; + default: + return ARGP_ERR_UNKNOWN; diff --git a/patches.suse/selftests-bpf-Fix-an-unused-but-set-variable-compile.patch b/patches.suse/selftests-bpf-Fix-an-unused-but-set-variable-compile.patch new file mode 100644 index 0000000..fcd5e33 --- /dev/null +++ b/patches.suse/selftests-bpf-Fix-an-unused-but-set-variable-compile.patch @@ -0,0 +1,42 @@ +From: Yonghong Song +Date: Fri, 12 Nov 2021 12:48:33 -0800 +Subject: selftests/bpf: Fix an unused-but-set-variable compiler warning +Patch-mainline: v5.17-rc1 +Git-commit: 21c6ec3d5275a77348b1af0e78cbbed0ee1558d4 +References: jsc#PED-1368 + +When using clang to build selftests with LLVM=1 in make commandline, +I hit the following compiler warning: + xdpxceiver.c:747:6: warning: variable 'total' set but not used [-Wunused-but-set-variable] + u32 total = 0; + ^ + +This patch fixed the issue by removing that declaration and its +assocatied unused operation. + +Signed-off-by: Yonghong Song +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211112204833.3579457-1-yhs@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/xdpxceiver.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/tools/testing/selftests/bpf/xdpxceiver.c ++++ b/tools/testing/selftests/bpf/xdpxceiver.c +@@ -744,7 +744,6 @@ static void receive_pkts(struct pkt_stre + struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream); + struct xsk_umem_info *umem = xsk->umem; + u32 idx_rx = 0, idx_fq = 0, rcvd, i; +- u32 total = 0; + int ret; + + while (pkt) { +@@ -799,7 +798,6 @@ static void receive_pkts(struct pkt_stre + + pthread_mutex_lock(&pacing_mutex); + pkts_in_flight -= rcvd; +- total += rcvd; + if (pkts_in_flight < umem->num_frames) + pthread_cond_signal(&pacing_cond); + pthread_mutex_unlock(&pacing_mutex); diff --git a/patches.suse/selftests-bpf-Fix-bpf_object-leak-in-skb_ctx-selftes.patch b/patches.suse/selftests-bpf-Fix-bpf_object-leak-in-skb_ctx-selftes.patch new file mode 100644 index 0000000..9a95809 --- /dev/null +++ b/patches.suse/selftests-bpf-Fix-bpf_object-leak-in-skb_ctx-selftes.patch @@ -0,0 +1,29 @@ +From: Andrii Nakryiko +Date: Sun, 7 Nov 2021 08:55:21 -0800 +Subject: selftests/bpf: Fix bpf_object leak in skb_ctx selftest +Patch-mainline: v5.17-rc1 +Git-commit: 8c7a95520184b6677ca6075e12df9c208d57d088 +References: jsc#PED-1368 + +skb_ctx selftest didn't close bpf_object implicitly allocated by +bpf_prog_test_load() helper. Fix the problem by explicitly calling +bpf_object__close() at the end of the test. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Reviewed-by: Hengqi Chen +Link: https://lore.kernel.org/bpf/20211107165521.9240-10-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/skb_ctx.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c ++++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c +@@ -111,4 +111,6 @@ void test_skb_ctx(void) + "ctx_out_mark", + "skb->mark == %u, expected %d\n", + skb.mark, 10); ++ ++ bpf_object__close(obj); + } diff --git a/patches.suse/selftests-bpf-Fix-bpf_prog_test_load-logic-to-pass-e.patch b/patches.suse/selftests-bpf-Fix-bpf_prog_test_load-logic-to-pass-e.patch new file mode 100644 index 0000000..8c57471 --- /dev/null +++ b/patches.suse/selftests-bpf-Fix-bpf_prog_test_load-logic-to-pass-e.patch @@ -0,0 +1,47 @@ +From: Andrii Nakryiko +Date: Wed, 10 Nov 2021 21:17:58 -0800 +Subject: selftests/bpf: Fix bpf_prog_test_load() logic to pass extra log level +Patch-mainline: v5.17-rc1 +Git-commit: 50dee7078b66d881c62f6177844d625f7ead6003 +References: jsc#PED-1368 + +After recent refactoring bpf_prog_test_load(), used across multiple +selftests, lost ability to specify extra log_level 1 or 2 (for -vv and +-vvv, respectively). Fix that problem by using bpf_object__load_xattr() +API that supports extra log_level flags. Also restore +BPF_F_TEST_RND_HI32 prog_flags by utilizing new bpf_program__set_extra_flags() +API. + +Fixes: f87c1930ac29 ("selftests/bpf: Merge test_stub.c into testing_helpers.c") +Reported-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211111051758.92283-3-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/testing_helpers.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/tools/testing/selftests/bpf/testing_helpers.c ++++ b/tools/testing/selftests/bpf/testing_helpers.c +@@ -88,6 +88,7 @@ int extra_prog_load_log_flags = 0; + int bpf_prog_test_load(const char *file, enum bpf_prog_type type, + struct bpf_object **pobj, int *prog_fd) + { ++ struct bpf_object_load_attr attr = {}; + struct bpf_object *obj; + struct bpf_program *prog; + int err; +@@ -105,7 +106,11 @@ int bpf_prog_test_load(const char *file, + if (type != BPF_PROG_TYPE_UNSPEC) + bpf_program__set_type(prog, type); + +- err = bpf_object__load(obj); ++ bpf_program__set_extra_flags(prog, BPF_F_TEST_RND_HI32); ++ ++ attr.obj = obj; ++ attr.log_level = extra_prog_load_log_flags; ++ err = bpf_object__load_xattr(&attr); + if (err) + goto err_out; + diff --git a/patches.suse/selftests-bpf-Fix-checkpatch-error-on-empty-function.patch b/patches.suse/selftests-bpf-Fix-checkpatch-error-on-empty-function.patch new file mode 100644 index 0000000..b731e1d --- /dev/null +++ b/patches.suse/selftests-bpf-Fix-checkpatch-error-on-empty-function.patch @@ -0,0 +1,296 @@ +From: Hou Tao +Date: Fri, 10 Dec 2021 22:16:50 +0800 +Subject: selftests/bpf: Fix checkpatch error on empty function parameter +Patch-mainline: v5.17-rc1 +Git-commit: 9a93bf3fda3d03762868b1424e898395ffc71575 +References: jsc#PED-1368 + +Fix checkpatch error: "ERROR: Bad function definition - void foo() +should probably be void foo(void)". Most replacements are done by +the following command: + + sed -i 's#\([a-z]\)()$#\1(void)#g' testing/selftests/bpf/benchs/*.c + +Signed-off-by: Hou Tao +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211210141652.877186-3-houtao1@huawei.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/bench.c | 2 - + tools/testing/selftests/bpf/bench.h | 9 ++++--- + tools/testing/selftests/bpf/benchs/bench_count.c | 2 - + tools/testing/selftests/bpf/benchs/bench_rename.c | 16 ++++++------- + tools/testing/selftests/bpf/benchs/bench_ringbufs.c | 14 +++++------ + tools/testing/selftests/bpf/benchs/bench_trigger.c | 24 ++++++++++---------- + 6 files changed, 34 insertions(+), 33 deletions(-) + +--- a/tools/testing/selftests/bpf/bench.c ++++ b/tools/testing/selftests/bpf/bench.c +@@ -39,7 +39,7 @@ static int bump_memlock_rlimit(void) + return setrlimit(RLIMIT_MEMLOCK, &rlim_new); + } + +-void setup_libbpf() ++void setup_libbpf(void) + { + int err; + +--- a/tools/testing/selftests/bpf/bench.h ++++ b/tools/testing/selftests/bpf/bench.h +@@ -38,8 +38,8 @@ struct bench_res { + + struct bench { + const char *name; +- void (*validate)(); +- void (*setup)(); ++ void (*validate)(void); ++ void (*setup)(void); + void *(*producer_thread)(void *ctx); + void *(*consumer_thread)(void *ctx); + void (*measure)(struct bench_res* res); +@@ -54,7 +54,7 @@ struct counter { + extern struct env env; + extern const struct bench *bench; + +-void setup_libbpf(); ++void setup_libbpf(void); + void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns); + void hits_drops_report_final(struct bench_res res[], int res_cnt); + void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns); +@@ -62,7 +62,8 @@ void false_hits_report_final(struct benc + void ops_report_progress(int iter, struct bench_res *res, long delta_ns); + void ops_report_final(struct bench_res res[], int res_cnt); + +-static inline __u64 get_time_ns() { ++static inline __u64 get_time_ns(void) ++{ + struct timespec t; + + clock_gettime(CLOCK_MONOTONIC, &t); +--- a/tools/testing/selftests/bpf/benchs/bench_count.c ++++ b/tools/testing/selftests/bpf/benchs/bench_count.c +@@ -36,7 +36,7 @@ static struct count_local_ctx { + struct counter *hits; + } count_local_ctx; + +-static void count_local_setup() ++static void count_local_setup(void) + { + struct count_local_ctx *ctx = &count_local_ctx; + +--- a/tools/testing/selftests/bpf/benchs/bench_rename.c ++++ b/tools/testing/selftests/bpf/benchs/bench_rename.c +@@ -11,7 +11,7 @@ static struct ctx { + int fd; + } ctx; + +-static void validate() ++static void validate(void) + { + if (env.producer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-producer!\n"); +@@ -43,7 +43,7 @@ static void measure(struct bench_res *re + res->hits = atomic_swap(&ctx.hits.value, 0); + } + +-static void setup_ctx() ++static void setup_ctx(void) + { + setup_libbpf(); + +@@ -71,36 +71,36 @@ static void attach_bpf(struct bpf_progra + } + } + +-static void setup_base() ++static void setup_base(void) + { + setup_ctx(); + } + +-static void setup_kprobe() ++static void setup_kprobe(void) + { + setup_ctx(); + attach_bpf(ctx.skel->progs.prog1); + } + +-static void setup_kretprobe() ++static void setup_kretprobe(void) + { + setup_ctx(); + attach_bpf(ctx.skel->progs.prog2); + } + +-static void setup_rawtp() ++static void setup_rawtp(void) + { + setup_ctx(); + attach_bpf(ctx.skel->progs.prog3); + } + +-static void setup_fentry() ++static void setup_fentry(void) + { + setup_ctx(); + attach_bpf(ctx.skel->progs.prog4); + } + +-static void setup_fexit() ++static void setup_fexit(void) + { + setup_ctx(); + attach_bpf(ctx.skel->progs.prog5); +--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c ++++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c +@@ -88,12 +88,12 @@ const struct argp bench_ringbufs_argp = + + static struct counter buf_hits; + +-static inline void bufs_trigger_batch() ++static inline void bufs_trigger_batch(void) + { + (void)syscall(__NR_getpgid); + } + +-static void bufs_validate() ++static void bufs_validate(void) + { + if (env.consumer_cnt != 1) { + fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n"); +@@ -132,7 +132,7 @@ static void ringbuf_libbpf_measure(struc + res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); + } + +-static struct ringbuf_bench *ringbuf_setup_skeleton() ++static struct ringbuf_bench *ringbuf_setup_skeleton(void) + { + struct ringbuf_bench *skel; + +@@ -167,7 +167,7 @@ static int buf_process_sample(void *ctx, + return 0; + } + +-static void ringbuf_libbpf_setup() ++static void ringbuf_libbpf_setup(void) + { + struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx; + struct bpf_link *link; +@@ -223,7 +223,7 @@ static void ringbuf_custom_measure(struc + res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); + } + +-static void ringbuf_custom_setup() ++static void ringbuf_custom_setup(void) + { + struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx; + const size_t page_size = getpagesize(); +@@ -352,7 +352,7 @@ static void perfbuf_measure(struct bench + res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); + } + +-static struct perfbuf_bench *perfbuf_setup_skeleton() ++static struct perfbuf_bench *perfbuf_setup_skeleton(void) + { + struct perfbuf_bench *skel; + +@@ -390,7 +390,7 @@ perfbuf_process_sample_raw(void *input_c + return LIBBPF_PERF_EVENT_CONT; + } + +-static void perfbuf_libbpf_setup() ++static void perfbuf_libbpf_setup(void) + { + struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx; + struct perf_event_attr attr; +--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c ++++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c +@@ -11,7 +11,7 @@ static struct trigger_ctx { + + static struct counter base_hits; + +-static void trigger_validate() ++static void trigger_validate(void) + { + if (env.consumer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); +@@ -45,7 +45,7 @@ static void trigger_measure(struct bench + res->hits = atomic_swap(&ctx.skel->bss->hits, 0); + } + +-static void setup_ctx() ++static void setup_ctx(void) + { + setup_libbpf(); + +@@ -67,37 +67,37 @@ static void attach_bpf(struct bpf_progra + } + } + +-static void trigger_tp_setup() ++static void trigger_tp_setup(void) + { + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_tp); + } + +-static void trigger_rawtp_setup() ++static void trigger_rawtp_setup(void) + { + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_raw_tp); + } + +-static void trigger_kprobe_setup() ++static void trigger_kprobe_setup(void) + { + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_kprobe); + } + +-static void trigger_fentry_setup() ++static void trigger_fentry_setup(void) + { + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_fentry); + } + +-static void trigger_fentry_sleep_setup() ++static void trigger_fentry_sleep_setup(void) + { + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep); + } + +-static void trigger_fmodret_setup() ++static void trigger_fmodret_setup(void) + { + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_fmodret); +@@ -183,22 +183,22 @@ static void usetup(bool use_retprobe, bo + ctx.skel->links.bench_trigger_uprobe = link; + } + +-static void uprobe_setup_with_nop() ++static void uprobe_setup_with_nop(void) + { + usetup(false, true); + } + +-static void uretprobe_setup_with_nop() ++static void uretprobe_setup_with_nop(void) + { + usetup(true, true); + } + +-static void uprobe_setup_without_nop() ++static void uprobe_setup_without_nop(void) + { + usetup(false, false); + } + +-static void uretprobe_setup_without_nop() ++static void uretprobe_setup_without_nop(void) + { + usetup(true, false); + } diff --git a/patches.suse/selftests-bpf-Fix-memory-leaks-in-btf_type_c_dump-he.patch b/patches.suse/selftests-bpf-Fix-memory-leaks-in-btf_type_c_dump-he.patch new file mode 100644 index 0000000..a5c5735 --- /dev/null +++ b/patches.suse/selftests-bpf-Fix-memory-leaks-in-btf_type_c_dump-he.patch @@ -0,0 +1,47 @@ +From: Andrii Nakryiko +Date: Sun, 7 Nov 2021 08:55:15 -0800 +Subject: selftests/bpf: Fix memory leaks in btf_type_c_dump() helper +Patch-mainline: v5.17-rc1 +Git-commit: 8ba285874913da21ca39a46376e9cc5ce0f45f94 +References: jsc#PED-1368 + +Free up memory and resources used by temporary allocated memstream and +btf_dump instance. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Reviewed-by: Hengqi Chen +Link: https://lore.kernel.org/bpf/20211107165521.9240-4-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/btf_helpers.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/tools/testing/selftests/bpf/btf_helpers.c ++++ b/tools/testing/selftests/bpf/btf_helpers.c +@@ -251,18 +251,23 @@ const char *btf_type_c_dump(const struct + d = btf_dump__new(btf, NULL, &opts, btf_dump_printf); + if (libbpf_get_error(d)) { + fprintf(stderr, "Failed to create btf_dump instance: %ld\n", libbpf_get_error(d)); +- return NULL; ++ goto err_out; + } + + for (i = 1; i < btf__type_cnt(btf); i++) { + err = btf_dump__dump_type(d, i); + if (err) { + fprintf(stderr, "Failed to dump type [%d]: %d\n", i, err); +- return NULL; ++ goto err_out; + } + } + ++ btf_dump__free(d); + fflush(buf_file); + fclose(buf_file); + return buf; ++err_out: ++ btf_dump__free(d); ++ fclose(buf_file); ++ return NULL; + } diff --git a/patches.suse/selftests-bpf-Fix-misaligned-accesses-in-xdp-and-xdp.patch b/patches.suse/selftests-bpf-Fix-misaligned-accesses-in-xdp-and-xdp.patch new file mode 100644 index 0000000..cb18a3f --- /dev/null +++ b/patches.suse/selftests-bpf-Fix-misaligned-accesses-in-xdp-and-xdp.patch @@ -0,0 +1,76 @@ +From: Andrii Nakryiko +Date: Tue, 23 Nov 2021 16:23:25 -0800 +Subject: selftests/bpf: Fix misaligned accesses in xdp and xdp_bpf2bpf tests +Patch-mainline: v5.17-rc1 +Git-commit: 8f6f41f39348f25db843f2fcb2f1c166b4bfa2d7 +References: jsc#PED-1368 + +Similar to previous patch, just copy over necessary struct into local +stack variable before checking its fields. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211124002325.1737739-14-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/xdp.c | 11 ++++++----- + tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c | 6 +++--- + 2 files changed, 9 insertions(+), 8 deletions(-) + +--- a/tools/testing/selftests/bpf/prog_tests/xdp.c ++++ b/tools/testing/selftests/bpf/prog_tests/xdp.c +@@ -11,8 +11,8 @@ void test_xdp(void) + const char *file = "./test_xdp.o"; + struct bpf_object *obj; + char buf[128]; +- struct ipv6hdr *iph6 = (void *)buf + sizeof(struct ethhdr); +- struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); ++ struct ipv6hdr iph6; ++ struct iphdr iph; + __u32 duration, retval, size; + int err, prog_fd, map_fd; + +@@ -28,16 +28,17 @@ void test_xdp(void) + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); +- ++ memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph)); + CHECK(err || retval != XDP_TX || size != 74 || +- iph->protocol != IPPROTO_IPIP, "ipv4", ++ iph.protocol != IPPROTO_IPIP, "ipv4", + "err %d errno %d retval %d size %d\n", + err, errno, retval, size); + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), + buf, &size, &retval, &duration); ++ memcpy(&iph6, buf + sizeof(struct ethhdr), sizeof(iph6)); + CHECK(err || retval != XDP_TX || size != 114 || +- iph6->nexthdr != IPPROTO_IPV6, "ipv6", ++ iph6.nexthdr != IPPROTO_IPV6, "ipv6", + "err %d errno %d retval %d size %d\n", + err, errno, retval, size); + out: +--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c ++++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c +@@ -42,7 +42,7 @@ void test_xdp_bpf2bpf(void) + char buf[128]; + int err, pkt_fd, map_fd; + bool passed = false; +- struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); ++ struct iphdr iph; + struct iptnl_info value4 = {.family = AF_INET}; + struct test_xdp *pkt_skel = NULL; + struct test_xdp_bpf2bpf *ftrace_skel = NULL; +@@ -93,9 +93,9 @@ void test_xdp_bpf2bpf(void) + /* Run test program */ + err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); +- ++ memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph)); + if (CHECK(err || retval != XDP_TX || size != 74 || +- iph->protocol != IPPROTO_IPIP, "ipv4", ++ iph.protocol != IPPROTO_IPIP, "ipv4", + "err %d errno %d retval %d size %d\n", + err, errno, retval, size)) + goto out; diff --git a/patches.suse/selftests-bpf-Fix-misaligned-memory-access-in-queue_.patch b/patches.suse/selftests-bpf-Fix-misaligned-memory-access-in-queue_.patch new file mode 100644 index 0000000..80876da --- /dev/null +++ b/patches.suse/selftests-bpf-Fix-misaligned-memory-access-in-queue_.patch @@ -0,0 +1,50 @@ +From: Andrii Nakryiko +Date: Tue, 23 Nov 2021 16:23:22 -0800 +Subject: selftests/bpf: Fix misaligned memory access in queue_stack_map test +Patch-mainline: v5.17-rc1 +Git-commit: e2e0d90c550a2588ebed7aa2753adaac0f633989 +References: jsc#PED-1368 + +Copy over iphdr into a local variable before accessing its fields. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211124002325.1737739-11-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/queue_stack_map.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c ++++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c +@@ -14,7 +14,7 @@ static void test_queue_stack_map_by_type + int i, err, prog_fd, map_in_fd, map_out_fd; + char file[32], buf[128]; + struct bpf_object *obj; +- struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); ++ struct iphdr iph; + + /* Fill test values to be used */ + for (i = 0; i < MAP_SIZE; i++) +@@ -60,15 +60,17 @@ static void test_queue_stack_map_by_type + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); +- if (err || retval || size != sizeof(pkt_v4) || +- iph->daddr != val) ++ if (err || retval || size != sizeof(pkt_v4)) ++ break; ++ memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph)); ++ if (iph.daddr != val) + break; + } + +- CHECK(err || retval || size != sizeof(pkt_v4) || iph->daddr != val, ++ CHECK(err || retval || size != sizeof(pkt_v4) || iph.daddr != val, + "bpf_map_pop_elem", + "err %d errno %d retval %d size %d iph->daddr %u\n", +- err, errno, retval, size, iph->daddr); ++ err, errno, retval, size, iph.daddr); + + /* Queue is empty, program should return TC_ACT_SHOT */ + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), diff --git a/patches.suse/selftests-bpf-Fix-misaligned-memory-accesses-in-xdp_.patch b/patches.suse/selftests-bpf-Fix-misaligned-memory-accesses-in-xdp_.patch new file mode 100644 index 0000000..f4bb8c3 --- /dev/null +++ b/patches.suse/selftests-bpf-Fix-misaligned-memory-accesses-in-xdp_.patch @@ -0,0 +1,79 @@ +From: Andrii Nakryiko +Date: Tue, 23 Nov 2021 16:23:24 -0800 +Subject: selftests/bpf: Fix misaligned memory accesses in xdp_bonding test +Patch-mainline: v5.17-rc1 +Git-commit: 43080b7106db5bcdb4f09c2648e968151e1461b7 +References: jsc#PED-1368 + +Construct packet buffer explicitly for each packet to avoid unaligned +memory accesses. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211124002325.1737739-13-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/xdp_bonding.c | 36 ++++++++++--------- + 1 file changed, 20 insertions(+), 16 deletions(-) + +--- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c ++++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c +@@ -218,9 +218,9 @@ static int send_udp_packets(int vary_dst + .h_dest = BOND2_MAC, + .h_proto = htons(ETH_P_IP), + }; +- uint8_t buf[128] = {}; +- struct iphdr *iph = (struct iphdr *)(buf + sizeof(eh)); +- struct udphdr *uh = (struct udphdr *)(buf + sizeof(eh) + sizeof(*iph)); ++ struct iphdr iph = {}; ++ struct udphdr uh = {}; ++ uint8_t buf[128]; + int i, s = -1; + int ifindex; + +@@ -232,17 +232,16 @@ static int send_udp_packets(int vary_dst + if (!ASSERT_GT(ifindex, 0, "get bond1 ifindex")) + goto err; + +- memcpy(buf, &eh, sizeof(eh)); +- iph->ihl = 5; +- iph->version = 4; +- iph->tos = 16; +- iph->id = 1; +- iph->ttl = 64; +- iph->protocol = IPPROTO_UDP; +- iph->saddr = 1; +- iph->daddr = 2; +- iph->tot_len = htons(sizeof(buf) - ETH_HLEN); +- iph->check = 0; ++ iph.ihl = 5; ++ iph.version = 4; ++ iph.tos = 16; ++ iph.id = 1; ++ iph.ttl = 64; ++ iph.protocol = IPPROTO_UDP; ++ iph.saddr = 1; ++ iph.daddr = 2; ++ iph.tot_len = htons(sizeof(buf) - ETH_HLEN); ++ iph.check = 0; + + for (i = 1; i <= NPACKETS; i++) { + int n; +@@ -253,10 +252,15 @@ static int send_udp_packets(int vary_dst + }; + + /* vary the UDP destination port for even distribution with roundrobin/xor modes */ +- uh->dest++; ++ uh.dest++; + + if (vary_dst_ip) +- iph->daddr++; ++ iph.daddr++; ++ ++ /* construct a packet */ ++ memcpy(buf, &eh, sizeof(eh)); ++ memcpy(buf + sizeof(eh), &iph, sizeof(iph)); ++ memcpy(buf + sizeof(eh) + sizeof(iph), &uh, sizeof(uh)); + + n = sendto(s, buf, sizeof(buf), 0, (struct sockaddr *)&saddr_ll, sizeof(saddr_ll)); + if (!ASSERT_EQ(n, sizeof(buf), "sendto")) diff --git a/patches.suse/selftests-bpf-Fix-non-strict-SEC-program-sections.patch b/patches.suse/selftests-bpf-Fix-non-strict-SEC-program-sections.patch new file mode 100644 index 0000000..274c42d --- /dev/null +++ b/patches.suse/selftests-bpf-Fix-non-strict-SEC-program-sections.patch @@ -0,0 +1,101 @@ +From: Andrii Nakryiko +Date: Wed, 3 Nov 2021 15:08:41 -0700 +Subject: selftests/bpf: Fix non-strict SEC() program sections +Patch-mainline: v5.17-rc1 +Git-commit: 3d1d62397f4a12dedee09727b26cd5a4b254ebb7 +References: jsc#PED-1368 + +Fix few more SEC() definitions that were previously missed. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Acked-by: Dave Marchevsky +Link: https://lore.kernel.org/bpf/20211103220845.2676888-9-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/progs/test_l4lb.c | 2 +- + tools/testing/selftests/bpf/progs/test_l4lb_noinline.c | 2 +- + tools/testing/selftests/bpf/progs/test_map_lock.c | 2 +- + tools/testing/selftests/bpf/progs/test_queue_stack_map.h | 2 +- + tools/testing/selftests/bpf/progs/test_skb_ctx.c | 2 +- + tools/testing/selftests/bpf/progs/test_spin_lock.c | 2 +- + tools/testing/selftests/bpf/progs/test_tcp_estats.c | 2 +- + 7 files changed, 7 insertions(+), 7 deletions(-) + +--- a/tools/testing/selftests/bpf/progs/test_l4lb.c ++++ b/tools/testing/selftests/bpf/progs/test_l4lb.c +@@ -448,7 +448,7 @@ static __always_inline int process_packe + return bpf_redirect(ifindex, 0); + } + +-SEC("l4lb-demo") ++SEC("tc") + int balancer_ingress(struct __sk_buff *ctx) + { + void *data_end = (void *)(long)ctx->data_end; +--- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c ++++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c +@@ -447,7 +447,7 @@ static __noinline int process_packet(voi + return bpf_redirect(ifindex, 0); + } + +-SEC("l4lb-demo") ++SEC("tc") + int balancer_ingress(struct __sk_buff *ctx) + { + void *data_end = (void *)(long)ctx->data_end; +--- a/tools/testing/selftests/bpf/progs/test_map_lock.c ++++ b/tools/testing/selftests/bpf/progs/test_map_lock.c +@@ -30,7 +30,7 @@ struct { + __type(value, struct array_elem); + } array_map SEC(".maps"); + +-SEC("map_lock_demo") ++SEC("cgroup/skb") + int bpf_map_lock_test(struct __sk_buff *skb) + { + struct hmap_elem zero = {}, *val; +--- a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h ++++ b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h +@@ -24,7 +24,7 @@ struct { + __uint(value_size, sizeof(__u32)); + } map_out SEC(".maps"); + +-SEC("test") ++SEC("tc") + int _test(struct __sk_buff *skb) + { + void *data_end = (void *)(long)skb->data_end; +--- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c ++++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c +@@ -5,7 +5,7 @@ + + char _license[] SEC("license") = "GPL"; + +-SEC("skb_ctx") ++SEC("tc") + int process(struct __sk_buff *skb) + { + #pragma clang loop unroll(full) +--- a/tools/testing/selftests/bpf/progs/test_spin_lock.c ++++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c +@@ -45,7 +45,7 @@ struct { + + #define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20) + +-SEC("spin_lock_demo") ++SEC("tc") + int bpf_sping_lock_test(struct __sk_buff *skb) + { + volatile int credit = 0, max_credit = 100, pkt_len = 64; +--- a/tools/testing/selftests/bpf/progs/test_tcp_estats.c ++++ b/tools/testing/selftests/bpf/progs/test_tcp_estats.c +@@ -244,7 +244,7 @@ static __always_inline void send_basic_e + bpf_map_update_elem(&ev_record_map, &key, &ev, BPF_ANY); + } + +-SEC("dummy_tracepoint") ++SEC("tp/dummy/tracepoint") + int _dummy_tracepoint(struct dummy_tracepoint_args *arg) + { + if (!arg->sock) diff --git a/patches.suse/selftests-bpf-Fix-possible-NULL-passed-to-memcpy-wit.patch b/patches.suse/selftests-bpf-Fix-possible-NULL-passed-to-memcpy-wit.patch new file mode 100644 index 0000000..93d2464 --- /dev/null +++ b/patches.suse/selftests-bpf-Fix-possible-NULL-passed-to-memcpy-wit.patch @@ -0,0 +1,30 @@ +From: Andrii Nakryiko +Date: Tue, 23 Nov 2021 16:23:20 -0800 +Subject: selftests/bpf: Fix possible NULL passed to memcpy() with zero size +Patch-mainline: v5.17-rc1 +Git-commit: 3bd0233f388e061c44d36a1ac614a3bb4a851b7e +References: jsc#PED-1368 + +Prevent sanitizer from complaining about passing NULL into memcpy(), +even if it happens with zero size. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211124002325.1737739-9-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/core_reloc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c ++++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c +@@ -881,7 +881,8 @@ void test_core_reloc(void) + data = mmap_data; + + memset(mmap_data, 0, sizeof(*data)); +- memcpy(data->in, test_case->input, test_case->input_len); ++ if (test_case->input_len) ++ memcpy(data->in, test_case->input, test_case->input_len); + data->my_pid_tgid = my_pid_tgid; + + link = bpf_program__attach_raw_tracepoint(prog, tp_name); diff --git a/patches.suse/selftests-bpf-Fix-segfault-in-bpf_tcp_ca.patch b/patches.suse/selftests-bpf-Fix-segfault-in-bpf_tcp_ca.patch new file mode 100644 index 0000000..618f79f --- /dev/null +++ b/patches.suse/selftests-bpf-Fix-segfault-in-bpf_tcp_ca.patch @@ -0,0 +1,51 @@ +From: Jean-Philippe Brucker +Date: Mon, 13 Dec 2021 18:30:59 +0000 +Subject: selftests/bpf: Fix segfault in bpf_tcp_ca +Patch-mainline: v5.17-rc1 +Git-commit: acd143eefb8270b8f50e2ce44e2fcdbbcf496a86 +References: jsc#PED-1368 + +Since commit ad9a7f96445b ("libbpf: Improve logging around BPF program +loading"), libbpf_debug_print() gets an additional prog_name parameter +but doesn't pass it to printf(). Since the format string now expects two +arguments, printf() may read uninitialized data and segfault. Pass +prog_name through. + +Fixes: ad9a7f96445b ("libbpf: Improve logging around BPF program loading") +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211213183058.346066-1-jean-philippe@linaro.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c ++++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +@@ -217,7 +217,7 @@ static bool found; + static int libbpf_debug_print(enum libbpf_print_level level, + const char *format, va_list args) + { +- const char *log_buf; ++ const char *prog_name, *log_buf; + + if (level != LIBBPF_WARN || + !strstr(format, "-- BEGIN PROG LOAD LOG --")) { +@@ -225,15 +225,14 @@ static int libbpf_debug_print(enum libbp + return 0; + } + +- /* skip prog_name */ +- va_arg(args, char *); ++ prog_name = va_arg(args, char *); + log_buf = va_arg(args, char *); + if (!log_buf) + goto out; + if (err_str && strstr(log_buf, err_str) != NULL) + found = true; + out: +- printf(format, log_buf); ++ printf(format, prog_name, log_buf); + return 0; + } + diff --git a/patches.suse/selftests-bpf-Fix-trivial-typo.patch b/patches.suse/selftests-bpf-Fix-trivial-typo.patch new file mode 100644 index 0000000..ba281e8 --- /dev/null +++ b/patches.suse/selftests-bpf-Fix-trivial-typo.patch @@ -0,0 +1,29 @@ +From: Drew Fustini +Date: Sun, 21 Nov 2021 23:05:30 -0800 +Subject: selftests/bpf: Fix trivial typo +Patch-mainline: v5.17-rc1 +Git-commit: fa721d4f0b91f525339996f4faef7bb072d70162 +References: jsc#PED-1368 + +Fix trivial typo in comment from 'oveflow' to 'overflow'. + +Reported-by: Gustavo A. R. Silva +Signed-off-by: Drew Fustini +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211122070528.837806-1-dfustini@baylibre.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/btf_dump.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c ++++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c +@@ -741,7 +741,7 @@ static void test_btf_dump_struct_data(st + /* overflow bpf_sock_ops struct with final element nonzero/zero. + * Regardless of the value of the final field, we don't have all the + * data we need to display it, so we should trigger an overflow. +- * In other words oveflow checking should trump "is field zero?" ++ * In other words overflow checking should trump "is field zero?" + * checks because if we've overflowed, it shouldn't matter what the + * field is - we can't trust its value so shouldn't display it. + */ diff --git a/patches.suse/selftests-bpf-Fix-xdpxceiver-failures-for-no-hugepag.patch b/patches.suse/selftests-bpf-Fix-xdpxceiver-failures-for-no-hugepag.patch new file mode 100644 index 0000000..c74a356 --- /dev/null +++ b/patches.suse/selftests-bpf-Fix-xdpxceiver-failures-for-no-hugepag.patch @@ -0,0 +1,49 @@ +From: Tirthendu Sarkar +Date: Wed, 17 Nov 2021 18:06:13 +0530 +Subject: selftests/bpf: Fix xdpxceiver failures for no hugepages +Patch-mainline: v5.17-rc1 +Git-commit: dd7f091fd22b1dce6c20e8f7769aa068ed88ac6d +References: jsc#PED-1368 + +xsk_configure_umem() needs hugepages to work in unaligned mode. So when +hugepages are not configured, 'unaligned' tests should be skipped which +is determined by the helper function hugepages_present(). This function +erroneously returns true with MAP_NORESERVE flag even when no hugepages +are configured. The removal of this flag fixes the issue. + +The test TEST_TYPE_UNALIGNED_INV_DESC also needs to be skipped when +there are no hugepages. However, this was not skipped as there was no +check for presence of hugepages and hence was failing. The check to skip +the test has now been added. + +Fixes: a4ba98dd0c69 (selftests: xsk: Add test for unaligned mode) +Signed-off-by: Tirthendu Sarkar +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211117123613.22288-1-tirthendu.sarkar@intel.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/xdpxceiver.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/tools/testing/selftests/bpf/xdpxceiver.c ++++ b/tools/testing/selftests/bpf/xdpxceiver.c +@@ -1217,7 +1217,7 @@ static bool hugepages_present(struct ifo + void *bufs; + + bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, +- MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_HUGETLB, -1, 0); ++ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); + if (bufs == MAP_FAILED) + return false; + +@@ -1364,6 +1364,10 @@ static void run_pkt_test(struct test_spe + testapp_invalid_desc(test); + break; + case TEST_TYPE_UNALIGNED_INV_DESC: ++ if (!hugepages_present(test->ifobj_tx)) { ++ ksft_test_result_skip("No 2M huge pages present.\n"); ++ return; ++ } + test_spec_set_name(test, "UNALIGNED_INV_DESC"); + test->ifobj_tx->umem->unaligned_mode = true; + test->ifobj_rx->umem->unaligned_mode = true; diff --git a/patches.suse/selftests-bpf-Free-inner-strings-index-in-btf-selfte.patch b/patches.suse/selftests-bpf-Free-inner-strings-index-in-btf-selfte.patch new file mode 100644 index 0000000..f0930cc --- /dev/null +++ b/patches.suse/selftests-bpf-Free-inner-strings-index-in-btf-selfte.patch @@ -0,0 +1,35 @@ +From: Andrii Nakryiko +Date: Sun, 7 Nov 2021 08:55:17 -0800 +Subject: selftests/bpf: Free inner strings index in btf selftest +Patch-mainline: v5.17-rc1 +Git-commit: 5309b516bcc6f76dda0e44a7a1824324277093d6 +References: jsc#PED-1368 + +Inner array of allocated strings wasn't freed on success. Now it's +always freed. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Reviewed-by: Hengqi Chen +Link: https://lore.kernel.org/bpf/20211107165521.9240-6-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/btf.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/tools/testing/selftests/bpf/prog_tests/btf.c ++++ b/tools/testing/selftests/bpf/prog_tests/btf.c +@@ -4046,11 +4046,9 @@ static void *btf_raw_create(const struct + next_str_idx < strs_cnt ? strs_idx[next_str_idx] : NULL; + + done: ++ free(strs_idx); + if (err) { +- if (raw_btf) +- free(raw_btf); +- if (strs_idx) +- free(strs_idx); ++ free(raw_btf); + return NULL; + } + return raw_btf; diff --git a/patches.suse/selftests-bpf-Free-per-cpu-values-array-in-bpf_iter-.patch b/patches.suse/selftests-bpf-Free-per-cpu-values-array-in-bpf_iter-.patch new file mode 100644 index 0000000..40fc3ba --- /dev/null +++ b/patches.suse/selftests-bpf-Free-per-cpu-values-array-in-bpf_iter-.patch @@ -0,0 +1,67 @@ +From: Andrii Nakryiko +Date: Sun, 7 Nov 2021 08:55:16 -0800 +Subject: selftests/bpf: Free per-cpu values array in bpf_iter selftest +Patch-mainline: v5.17-rc1 +Git-commit: b8b26e585f3a0fbcee1032c622f046787da57390 +References: jsc#PED-1368 + +Array holding per-cpu values wasn't freed. Fix that. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211107165521.9240-5-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c ++++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +@@ -699,14 +699,13 @@ static void test_bpf_percpu_hash_map(voi + char buf[64]; + void *val; + +- val = malloc(8 * bpf_num_possible_cpus()); +- + skel = bpf_iter_bpf_percpu_hash_map__open(); + if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__open", + "skeleton open failed\n")) + return; + + skel->rodata->num_cpus = bpf_num_possible_cpus(); ++ val = malloc(8 * bpf_num_possible_cpus()); + + err = bpf_iter_bpf_percpu_hash_map__load(skel); + if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__load", +@@ -770,6 +769,7 @@ free_link: + bpf_link__destroy(link); + out: + bpf_iter_bpf_percpu_hash_map__destroy(skel); ++ free(val); + } + + static void test_bpf_array_map(void) +@@ -870,14 +870,13 @@ static void test_bpf_percpu_array_map(vo + void *val; + int len; + +- val = malloc(8 * bpf_num_possible_cpus()); +- + skel = bpf_iter_bpf_percpu_array_map__open(); + if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__open", + "skeleton open failed\n")) + return; + + skel->rodata->num_cpus = bpf_num_possible_cpus(); ++ val = malloc(8 * bpf_num_possible_cpus()); + + err = bpf_iter_bpf_percpu_array_map__load(skel); + if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__load", +@@ -933,6 +932,7 @@ free_link: + bpf_link__destroy(link); + out: + bpf_iter_bpf_percpu_array_map__destroy(skel); ++ free(val); + } + + /* An iterator program deletes all local storage in a map. */ diff --git a/patches.suse/selftests-bpf-Improve-inner_map-test-coverage.patch b/patches.suse/selftests-bpf-Improve-inner_map-test-coverage.patch new file mode 100644 index 0000000..a677fa2 --- /dev/null +++ b/patches.suse/selftests-bpf-Improve-inner_map-test-coverage.patch @@ -0,0 +1,70 @@ +From: Alexei Starovoitov +Date: Wed, 1 Dec 2021 10:10:36 -0800 +Subject: selftests/bpf: Improve inner_map test coverage. +Patch-mainline: v5.17-rc1 +Git-commit: d82fa9b708d7d8a9c275d86c4388d24ecc63206c +References: jsc#PED-1368 + +Check that hash and array inner maps are properly initialized. + +Signed-off-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211201181040.23337-14-alexei.starovoitov@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/progs/map_ptr_kern.c | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c ++++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c +@@ -334,9 +334,11 @@ static inline int check_lpm_trie(void) + return 1; + } + ++#define INNER_MAX_ENTRIES 1234 ++ + struct inner_map { + __uint(type, BPF_MAP_TYPE_ARRAY); +- __uint(max_entries, 1); ++ __uint(max_entries, INNER_MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); + } inner_map SEC(".maps"); +@@ -348,7 +350,7 @@ struct { + __type(value, __u32); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_ARRAY); +- __uint(max_entries, 1); ++ __uint(max_entries, INNER_MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); + }); +@@ -360,8 +362,13 @@ static inline int check_array_of_maps(vo + { + struct bpf_array *array_of_maps = (struct bpf_array *)&m_array_of_maps; + struct bpf_map *map = (struct bpf_map *)&m_array_of_maps; ++ struct bpf_array *inner_map; ++ int key = 0; + + VERIFY(check_default(&array_of_maps->map, map)); ++ inner_map = bpf_map_lookup_elem(array_of_maps, &key); ++ VERIFY(inner_map != 0); ++ VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES); + + return 1; + } +@@ -382,8 +389,13 @@ static inline int check_hash_of_maps(voi + { + struct bpf_htab *hash_of_maps = (struct bpf_htab *)&m_hash_of_maps; + struct bpf_map *map = (struct bpf_map *)&m_hash_of_maps; ++ struct bpf_htab *inner_map; ++ int key = 2; + + VERIFY(check_default(&hash_of_maps->map, map)); ++ inner_map = bpf_map_lookup_elem(hash_of_maps, &key); ++ VERIFY(inner_map != 0); ++ VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES); + + return 1; + } diff --git a/patches.suse/selftests-bpf-Mark-variable-as-static.patch b/patches.suse/selftests-bpf-Mark-variable-as-static.patch new file mode 100644 index 0000000..e78b778 --- /dev/null +++ b/patches.suse/selftests-bpf-Mark-variable-as-static.patch @@ -0,0 +1,33 @@ +From: Yucong Sun +Date: Fri, 12 Nov 2021 11:25:34 -0800 +Subject: selftests/bpf: Mark variable as static +Patch-mainline: v5.17-rc1 +Git-commit: db813d7bd919c521b869d657dc4a2a2335974cc4 +References: jsc#PED-1368 + +Fix warnings from checkstyle.pl + +Signed-off-by: Yucong Sun +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211112192535.898352-4-fallentree@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/test_progs.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/tools/testing/selftests/bpf/test_progs.c ++++ b/tools/testing/selftests/bpf/test_progs.c +@@ -473,11 +473,11 @@ static struct prog_test_def prog_test_de + #include + #undef DEFINE_TEST + }; +-const int prog_test_cnt = ARRAY_SIZE(prog_test_defs); ++static const int prog_test_cnt = ARRAY_SIZE(prog_test_defs); + + const char *argp_program_version = "test_progs 0.1"; + const char *argp_program_bug_address = ""; +-const char argp_program_doc[] = "BPF selftests test runner"; ++static const char argp_program_doc[] = "BPF selftests test runner"; + + enum ARG_KEYS { + ARG_TEST_NUM = 'n', diff --git a/patches.suse/selftests-bpf-Measure-bpf_loop-verifier-performance.patch b/patches.suse/selftests-bpf-Measure-bpf_loop-verifier-performance.patch new file mode 100644 index 0000000..10aeeb9 --- /dev/null +++ b/patches.suse/selftests-bpf-Measure-bpf_loop-verifier-performance.patch @@ -0,0 +1,322 @@ +From: Joanne Koong +Date: Mon, 29 Nov 2021 19:06:21 -0800 +Subject: selftests/bpf: Measure bpf_loop verifier performance +Patch-mainline: v5.17-rc1 +Git-commit: f6e659b7f97c76d0471d12bf274ea2a097cf3c5c +References: jsc#PED-1368 + +This patch tests bpf_loop in pyperf and strobemeta, and measures the +verifier performance of replacing the traditional for loop +with bpf_loop. + +The results are as follows: + +~strobemeta~ + +Baseline + verification time 6808200 usec + stack depth 496 + processed 554252 insns (limit 1000000) max_states_per_insn 16 + total_states 15878 peak_states 13489 mark_read 3110 + #192 verif_scale_strobemeta:OK (unrolled loop) + +Using bpf_loop + verification time 31589 usec + stack depth 96+400 + processed 1513 insns (limit 1000000) max_states_per_insn 2 + total_states 106 peak_states 106 mark_read 60 + #193 verif_scale_strobemeta_bpf_loop:OK + +~pyperf600~ + +Baseline + verification time 29702486 usec + stack depth 368 + processed 626838 insns (limit 1000000) max_states_per_insn 7 + total_states 30368 peak_states 30279 mark_read 748 + #182 verif_scale_pyperf600:OK (unrolled loop) + +Using bpf_loop + verification time 148488 usec + stack depth 320+40 + processed 10518 insns (limit 1000000) max_states_per_insn 10 + total_states 705 peak_states 517 mark_read 38 + #183 verif_scale_pyperf600_bpf_loop:OK + +Using the bpf_loop helper led to approximately a 99% decrease +in the verification time and in the number of instructions. + +Signed-off-by: Joanne Koong +Signed-off-by: Alexei Starovoitov +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211130030622.4131246-4-joannekoong@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c | 12 ++ + tools/testing/selftests/bpf/progs/pyperf.h | 71 ++++++++++++++ + tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c | 6 + + tools/testing/selftests/bpf/progs/strobemeta.h | 75 ++++++++++++++- + tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c | 9 + + 5 files changed, 169 insertions(+), 4 deletions(-) + create mode 100644 tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c + create mode 100644 tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c + +--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c ++++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +@@ -115,6 +115,12 @@ void test_verif_scale_pyperf600() + scale_test("pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); + } + ++void test_verif_scale_pyperf600_bpf_loop(void) ++{ ++ /* use the bpf_loop helper*/ ++ scale_test("pyperf600_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); ++} ++ + void test_verif_scale_pyperf600_nounroll() + { + /* no unroll at all. +@@ -165,6 +171,12 @@ void test_verif_scale_strobemeta() + scale_test("strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); + } + ++void test_verif_scale_strobemeta_bpf_loop(void) ++{ ++ /* use the bpf_loop helper*/ ++ scale_test("strobemeta_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); ++} ++ + void test_verif_scale_strobemeta_nounroll1() + { + /* no unroll, tiny loops */ +--- a/tools/testing/selftests/bpf/progs/pyperf.h ++++ b/tools/testing/selftests/bpf/progs/pyperf.h +@@ -159,6 +159,59 @@ struct { + __uint(value_size, sizeof(long long) * 127); + } stackmap SEC(".maps"); + ++#ifdef USE_BPF_LOOP ++struct process_frame_ctx { ++ int cur_cpu; ++ int32_t *symbol_counter; ++ void *frame_ptr; ++ FrameData *frame; ++ PidData *pidData; ++ Symbol *sym; ++ Event *event; ++ bool done; ++}; ++ ++#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) ++ ++static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx) ++{ ++ int zero = 0; ++ void *frame_ptr = ctx->frame_ptr; ++ PidData *pidData = ctx->pidData; ++ FrameData *frame = ctx->frame; ++ int32_t *symbol_counter = ctx->symbol_counter; ++ int cur_cpu = ctx->cur_cpu; ++ Event *event = ctx->event; ++ Symbol *sym = ctx->sym; ++ ++ if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) { ++ int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; ++ int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym); ++ ++ if (!symbol_id) { ++ bpf_map_update_elem(&symbolmap, sym, &zero, 0); ++ symbol_id = bpf_map_lookup_elem(&symbolmap, sym); ++ if (!symbol_id) { ++ ctx->done = true; ++ return 1; ++ } ++ } ++ if (*symbol_id == new_symbol_id) ++ (*symbol_counter)++; ++ ++ barrier_var(i); ++ if (i >= STACK_MAX_LEN) ++ return 1; ++ ++ event->stack[i] = *symbol_id; ++ ++ event->stack_len = i + 1; ++ frame_ptr = frame->f_back; ++ } ++ return 0; ++} ++#endif /* USE_BPF_LOOP */ ++ + #ifdef GLOBAL_FUNC + __noinline + #elif defined(SUBPROGS) +@@ -228,11 +281,26 @@ int __on_event(struct bpf_raw_tracepoint + int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym); + if (symbol_counter == NULL) + return 0; ++#ifdef USE_BPF_LOOP ++ struct process_frame_ctx ctx = { ++ .cur_cpu = cur_cpu, ++ .symbol_counter = symbol_counter, ++ .frame_ptr = frame_ptr, ++ .frame = &frame, ++ .pidData = pidData, ++ .sym = &sym, ++ .event = event, ++ }; ++ ++ bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0); ++ if (ctx.done) ++ return 0; ++#else + #ifdef NO_UNROLL + #pragma clang loop unroll(disable) + #else + #pragma clang loop unroll(full) +-#endif ++#endif /* NO_UNROLL */ + /* Unwind python stack */ + for (int i = 0; i < STACK_MAX_LEN; ++i) { + if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) { +@@ -251,6 +319,7 @@ int __on_event(struct bpf_raw_tracepoint + frame_ptr = frame.f_back; + } + } ++#endif /* USE_BPF_LOOP */ + event->stack_complete = frame_ptr == NULL; + } else { + event->stack_complete = 1; +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c +@@ -0,0 +1,6 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) 2021 Facebook */ ++ ++#define STACK_MAX_LEN 600 ++#define USE_BPF_LOOP ++#include "pyperf.h" +--- a/tools/testing/selftests/bpf/progs/strobemeta.h ++++ b/tools/testing/selftests/bpf/progs/strobemeta.h +@@ -445,6 +445,48 @@ static __always_inline void *read_map_va + return payload; + } + ++#ifdef USE_BPF_LOOP ++enum read_type { ++ READ_INT_VAR, ++ READ_MAP_VAR, ++ READ_STR_VAR, ++}; ++ ++struct read_var_ctx { ++ struct strobemeta_payload *data; ++ void *tls_base; ++ struct strobemeta_cfg *cfg; ++ void *payload; ++ /* value gets mutated */ ++ struct strobe_value_generic *value; ++ enum read_type type; ++}; ++ ++static int read_var_callback(__u32 index, struct read_var_ctx *ctx) ++{ ++ switch (ctx->type) { ++ case READ_INT_VAR: ++ if (index >= STROBE_MAX_INTS) ++ return 1; ++ read_int_var(ctx->cfg, index, ctx->tls_base, ctx->value, ctx->data); ++ break; ++ case READ_MAP_VAR: ++ if (index >= STROBE_MAX_MAPS) ++ return 1; ++ ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base, ++ ctx->value, ctx->data, ctx->payload); ++ break; ++ case READ_STR_VAR: ++ if (index >= STROBE_MAX_STRS) ++ return 1; ++ ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base, ++ ctx->value, ctx->data, ctx->payload); ++ break; ++ } ++ return 0; ++} ++#endif /* USE_BPF_LOOP */ ++ + /* + * read_strobe_meta returns NULL, if no metadata was read; otherwise returns + * pointer to *right after* payload ends +@@ -475,11 +517,36 @@ static void *read_strobe_meta(struct tas + */ + tls_base = (void *)task; + ++#ifdef USE_BPF_LOOP ++ struct read_var_ctx ctx = { ++ .cfg = cfg, ++ .tls_base = tls_base, ++ .value = &value, ++ .data = data, ++ .payload = payload, ++ }; ++ int err; ++ ++ ctx.type = READ_INT_VAR; ++ err = bpf_loop(STROBE_MAX_INTS, read_var_callback, &ctx, 0); ++ if (err != STROBE_MAX_INTS) ++ return NULL; ++ ++ ctx.type = READ_STR_VAR; ++ err = bpf_loop(STROBE_MAX_STRS, read_var_callback, &ctx, 0); ++ if (err != STROBE_MAX_STRS) ++ return NULL; ++ ++ ctx.type = READ_MAP_VAR; ++ err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0); ++ if (err != STROBE_MAX_MAPS) ++ return NULL; ++#else + #ifdef NO_UNROLL + #pragma clang loop unroll(disable) + #else + #pragma unroll +-#endif ++#endif /* NO_UNROLL */ + for (int i = 0; i < STROBE_MAX_INTS; ++i) { + read_int_var(cfg, i, tls_base, &value, data); + } +@@ -487,7 +554,7 @@ static void *read_strobe_meta(struct tas + #pragma clang loop unroll(disable) + #else + #pragma unroll +-#endif ++#endif /* NO_UNROLL */ + for (int i = 0; i < STROBE_MAX_STRS; ++i) { + payload += read_str_var(cfg, i, tls_base, &value, data, payload); + } +@@ -495,10 +562,12 @@ static void *read_strobe_meta(struct tas + #pragma clang loop unroll(disable) + #else + #pragma unroll +-#endif ++#endif /* NO_UNROLL */ + for (int i = 0; i < STROBE_MAX_MAPS; ++i) { + payload = read_map_var(cfg, i, tls_base, &value, data, payload); + } ++#endif /* USE_BPF_LOOP */ ++ + /* + * return pointer right after end of payload, so it's possible to + * calculate exact amount of useful data that needs to be sent +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c +@@ -0,0 +1,9 @@ ++// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++/* Copyright (c) 2021 Facebook */ ++ ++#define STROBE_MAX_INTS 2 ++#define STROBE_MAX_STRS 25 ++#define STROBE_MAX_MAPS 100 ++#define STROBE_MAX_MAP_ENTRIES 20 ++#define USE_BPF_LOOP ++#include "strobemeta.h" diff --git a/patches.suse/selftests-bpf-Merge-test_stub.c-into-testing_helpers.patch b/patches.suse/selftests-bpf-Merge-test_stub.c-into-testing_helpers.patch new file mode 100644 index 0000000..f7ef853 --- /dev/null +++ b/patches.suse/selftests-bpf-Merge-test_stub.c-into-testing_helpers.patch @@ -0,0 +1,201 @@ +From: Andrii Nakryiko +Date: Wed, 3 Nov 2021 15:08:43 -0700 +Subject: selftests/bpf: Merge test_stub.c into testing_helpers.c +Patch-mainline: v5.17-rc1 +Git-commit: f87c1930ac2951d7fb3bacb523c24046c81015ed +References: jsc#PED-1368 + +Move testing prog and object load wrappers (bpf_prog_test_load and +bpf_test_load_program) into testing_helpers.{c,h} and get rid of +otherwise useless test_stub.c. Make testing_helpers.c available to +non-test_progs binaries as well. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Acked-by: Dave Marchevsky +Link: https://lore.kernel.org/bpf/20211103220845.2676888-11-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/Makefile | 31 +++++++------- + tools/testing/selftests/bpf/test_stub.c | 44 -------------------- + tools/testing/selftests/bpf/testing_helpers.c | 55 ++++++++++++++++++++++++++ + tools/testing/selftests/bpf/testing_helpers.h | 6 ++ + 4 files changed, 77 insertions(+), 59 deletions(-) + delete mode 100644 tools/testing/selftests/bpf/test_stub.c + +--- a/tools/testing/selftests/bpf/Makefile ++++ b/tools/testing/selftests/bpf/Makefile +@@ -178,10 +178,6 @@ $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) + $(Q)$(MAKE) $(submake_extras) -C bpf_testmod + $(Q)cp bpf_testmod/bpf_testmod.ko $@ + +-$(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) +- $(call msg,CC,,$@) +- $(Q)$(CC) -c $(CFLAGS) -o $@ $< +- + DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool + + $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT) +@@ -194,18 +190,23 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFA + + TEST_GEN_PROGS_EXTENDED += $(DEFAULT_BPFTOOL) + +-$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ) ++$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ) + +-$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c +-$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c +-$(OUTPUT)/test_sock: cgroup_helpers.c +-$(OUTPUT)/test_sock_addr: cgroup_helpers.c +-$(OUTPUT)/test_sockmap: cgroup_helpers.c +-$(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c +-$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c +-$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c +-$(OUTPUT)/test_sock_fields: cgroup_helpers.c +-$(OUTPUT)/test_sysctl: cgroup_helpers.c ++$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c testing_helpers.o ++$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c testing_helpers.o ++$(OUTPUT)/test_sock: cgroup_helpers.c testing_helpers.o ++$(OUTPUT)/test_sock_addr: cgroup_helpers.c testing_helpers.o ++$(OUTPUT)/test_sockmap: cgroup_helpers.c testing_helpers.o ++$(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c testing_helpers.o ++$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c testing_helpers.o ++$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c testing_helpers.o ++$(OUTPUT)/test_sock_fields: cgroup_helpers.c testing_helpers.o ++$(OUTPUT)/test_sysctl: cgroup_helpers.c testing_helpers.o ++$(OUTPUT)/test_tag: testing_helpers.o ++$(OUTPUT)/test_lirc_mode2_user: testing_helpers.o ++$(OUTPUT)/xdping: testing_helpers.o ++$(OUTPUT)/flow_dissector_load: testing_helpers.o ++$(OUTPUT)/test_maps: testing_helpers.o + + BPFTOOL ?= $(DEFAULT_BPFTOOL) + $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ +--- a/tools/testing/selftests/bpf/test_stub.c ++++ /dev/null +@@ -1,44 +0,0 @@ +-// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +-/* Copyright (C) 2019 Netronome Systems, Inc. */ +- +-#include +-#include +-#include +- +-int extra_prog_load_log_flags = 0; +- +-int bpf_prog_test_load(const char *file, enum bpf_prog_type type, +- struct bpf_object **pobj, int *prog_fd) +-{ +- struct bpf_prog_load_attr attr; +- +- memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); +- attr.file = file; +- attr.prog_type = type; +- attr.expected_attach_type = 0; +- attr.prog_flags = BPF_F_TEST_RND_HI32; +- attr.log_level = extra_prog_load_log_flags; +- +- return bpf_prog_load_xattr(&attr, pobj, prog_fd); +-} +- +-int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, +- size_t insns_cnt, const char *license, +- __u32 kern_version, char *log_buf, +- size_t log_buf_sz) +-{ +- struct bpf_load_program_attr load_attr; +- +- memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); +- load_attr.prog_type = type; +- load_attr.expected_attach_type = 0; +- load_attr.name = NULL; +- load_attr.insns = insns; +- load_attr.insns_cnt = insns_cnt; +- load_attr.license = license; +- load_attr.kern_version = kern_version; +- load_attr.prog_flags = BPF_F_TEST_RND_HI32; +- load_attr.log_level = extra_prog_load_log_flags; +- +- return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz); +-} +--- a/tools/testing/selftests/bpf/testing_helpers.c ++++ b/tools/testing/selftests/bpf/testing_helpers.c +@@ -1,7 +1,11 @@ + // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) ++/* Copyright (C) 2019 Netronome Systems, Inc. */ + /* Copyright (C) 2020 Facebook, Inc. */ + #include ++#include + #include ++#include ++#include + #include "testing_helpers.h" + + int parse_num_list(const char *s, bool **num_set, int *num_set_len) +@@ -78,3 +82,54 @@ __u32 link_info_prog_id(const struct bpf + } + return info->prog_id; + } ++ ++int extra_prog_load_log_flags = 0; ++ ++int bpf_prog_test_load(const char *file, enum bpf_prog_type type, ++ struct bpf_object **pobj, int *prog_fd) ++{ ++ struct bpf_object *obj; ++ struct bpf_program *prog; ++ int err; ++ ++ obj = bpf_object__open(file); ++ if (!obj) ++ return -errno; ++ ++ prog = bpf_object__next_program(obj, NULL); ++ if (!prog) { ++ err = -ENOENT; ++ goto err_out; ++ } ++ ++ if (type != BPF_PROG_TYPE_UNSPEC) ++ bpf_program__set_type(prog, type); ++ ++ err = bpf_object__load(obj); ++ if (err) ++ goto err_out; ++ ++ *pobj = obj; ++ *prog_fd = bpf_program__fd(prog); ++ ++ return 0; ++err_out: ++ bpf_object__close(obj); ++ return err; ++} ++ ++int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, ++ size_t insns_cnt, const char *license, ++ __u32 kern_version, char *log_buf, ++ size_t log_buf_sz) ++{ ++ LIBBPF_OPTS(bpf_prog_load_opts, opts, ++ .kern_version = kern_version, ++ .prog_flags = BPF_F_TEST_RND_HI32, ++ .log_level = extra_prog_load_log_flags, ++ .log_buf = log_buf, ++ .log_size = log_buf_sz, ++ ); ++ ++ return bpf_prog_load(type, NULL, license, insns, insns_cnt, &opts); ++} +--- a/tools/testing/selftests/bpf/testing_helpers.h ++++ b/tools/testing/selftests/bpf/testing_helpers.h +@@ -6,3 +6,9 @@ + + int parse_num_list(const char *s, bool **set, int *set_len); + __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info); ++int bpf_prog_test_load(const char *file, enum bpf_prog_type type, ++ struct bpf_object **pobj, int *prog_fd); ++int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, ++ size_t insns_cnt, const char *license, ++ __u32 kern_version, char *log_buf, ++ size_t log_buf_sz); diff --git a/patches.suse/selftests-bpf-Migrate-all-deprecated-perf_buffer-use.patch b/patches.suse/selftests-bpf-Migrate-all-deprecated-perf_buffer-use.patch new file mode 100644 index 0000000..7eba2a7 --- /dev/null +++ b/patches.suse/selftests-bpf-Migrate-all-deprecated-perf_buffer-use.patch @@ -0,0 +1,155 @@ +From: Andrii Nakryiko +Date: Wed, 10 Nov 2021 21:36:21 -0800 +Subject: selftests/bpf: Migrate all deprecated perf_buffer uses +Patch-mainline: v5.17-rc1 +Git-commit: 0b52a5f4b994c05070237271c7fac3265b640ffb +References: jsc#PED-1368 + +Migrate all old-style perf_buffer__new() and perf_buffer__new_raw() +calls to new v1.0+ variants. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211111053624.190580-7-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/benchs/bench_ringbufs.c | 8 ++------ + tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c | 5 ++--- + tools/testing/selftests/bpf/prog_tests/kfree_skb.c | 6 ++---- + tools/testing/selftests/bpf/prog_tests/perf_buffer.c | 6 ++---- + tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c | 7 ++----- + tools/testing/selftests/bpf/test_tcpnotify_user.c | 4 +--- + 6 files changed, 11 insertions(+), 25 deletions(-) + +--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c ++++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c +@@ -394,11 +394,6 @@ static void perfbuf_libbpf_setup() + { + struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx; + struct perf_event_attr attr; +- struct perf_buffer_raw_opts pb_opts = { +- .event_cb = perfbuf_process_sample_raw, +- .ctx = (void *)(long)0, +- .attr = &attr, +- }; + struct bpf_link *link; + + ctx->skel = perfbuf_setup_skeleton(); +@@ -423,7 +418,8 @@ static void perfbuf_libbpf_setup() + } + + ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf), +- args.perfbuf_sz, &pb_opts); ++ args.perfbuf_sz, &attr, ++ perfbuf_process_sample_raw, NULL, NULL); + if (!ctx->perfbuf) { + fprintf(stderr, "failed to create perfbuf\n"); + exit(1); +--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c ++++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c +@@ -85,7 +85,6 @@ void test_get_stack_raw_tp(void) + const char *file_err = "./test_get_stack_rawtp_err.o"; + const char *prog_name = "raw_tracepoint/sys_enter"; + int i, err, prog_fd, exp_cnt = MAX_CNT_RAWTP; +- struct perf_buffer_opts pb_opts = {}; + struct perf_buffer *pb = NULL; + struct bpf_link *link = NULL; + struct timespec tv = {0, 10}; +@@ -124,8 +123,8 @@ void test_get_stack_raw_tp(void) + if (!ASSERT_OK_PTR(link, "attach_raw_tp")) + goto close_prog; + +- pb_opts.sample_cb = get_stack_print_output; +- pb = perf_buffer__new(bpf_map__fd(map), 8, &pb_opts); ++ pb = perf_buffer__new(bpf_map__fd(map), 8, get_stack_print_output, ++ NULL, NULL, NULL); + if (!ASSERT_OK_PTR(pb, "perf_buf__new")) + goto close_prog; + +--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c ++++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c +@@ -66,7 +66,6 @@ void serial_test_kfree_skb(void) + struct bpf_map *perf_buf_map, *global_data; + struct bpf_program *prog, *fentry, *fexit; + struct bpf_object *obj, *obj2 = NULL; +- struct perf_buffer_opts pb_opts = {}; + struct perf_buffer *pb = NULL; + int err, kfree_skb_fd; + bool passed = false; +@@ -112,9 +111,8 @@ void serial_test_kfree_skb(void) + goto close_prog; + + /* set up perf buffer */ +- pb_opts.sample_cb = on_sample; +- pb_opts.ctx = &passed; +- pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts); ++ pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, ++ on_sample, NULL, &passed, NULL); + if (!ASSERT_OK_PTR(pb, "perf_buf__new")) + goto close_prog; + +--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c ++++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +@@ -47,7 +47,6 @@ void serial_test_perf_buffer(void) + { + int err, on_len, nr_on_cpus = 0, nr_cpus, i, j; + int zero = 0, my_pid = getpid(); +- struct perf_buffer_opts pb_opts = {}; + struct test_perf_buffer *skel; + cpu_set_t cpu_seen; + struct perf_buffer *pb; +@@ -82,9 +81,8 @@ void serial_test_perf_buffer(void) + goto out_close; + + /* set up perf buffer */ +- pb_opts.sample_cb = on_sample; +- pb_opts.ctx = &cpu_seen; +- pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, &pb_opts); ++ pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, ++ on_sample, NULL, &cpu_seen, NULL); + if (!ASSERT_OK_PTR(pb, "perf_buf__new")) + goto out_close; + +--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c ++++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c +@@ -49,7 +49,6 @@ void test_xdp_bpf2bpf(void) + struct vip key4 = {.protocol = 6, .family = AF_INET}; + struct bpf_program *prog; + struct perf_buffer *pb = NULL; +- struct perf_buffer_opts pb_opts = {}; + + /* Load XDP program to introspect */ + pkt_skel = test_xdp__open_and_load(); +@@ -86,10 +85,8 @@ void test_xdp_bpf2bpf(void) + goto out; + + /* Set up perf buffer */ +- pb_opts.sample_cb = on_sample; +- pb_opts.ctx = &passed; +- pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), +- 1, &pb_opts); ++ pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 1, ++ on_sample, NULL, &passed, NULL); + if (!ASSERT_OK_PTR(pb, "perf_buf__new")) + goto out; + +--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c ++++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c +@@ -72,7 +72,6 @@ int main(int argc, char **argv) + { + const char *file = "test_tcpnotify_kern.o"; + struct bpf_map *perf_map, *global_map; +- struct perf_buffer_opts pb_opts = {}; + struct tcpnotify_globals g = {0}; + struct perf_buffer *pb = NULL; + const char *cg_path = "/foo"; +@@ -117,8 +116,7 @@ int main(int argc, char **argv) + return -1; + } + +- pb_opts.sample_cb = dummyfn; +- pb = perf_buffer__new(bpf_map__fd(perf_map), 8, &pb_opts); ++ pb = perf_buffer__new(bpf_map__fd(perf_map), 8, dummyfn, NULL, NULL, NULL); + if (!pb) + goto err; + diff --git a/patches.suse/selftests-bpf-Migrate-selftests-to-bpf_map_create.patch b/patches.suse/selftests-bpf-Migrate-selftests-to-bpf_map_create.patch new file mode 100644 index 0000000..66ff84f --- /dev/null +++ b/patches.suse/selftests-bpf-Migrate-selftests-to-bpf_map_create.patch @@ -0,0 +1,1225 @@ +From: Andrii Nakryiko +Date: Wed, 24 Nov 2021 11:32:33 -0800 +Subject: selftests/bpf: Migrate selftests to bpf_map_create() +Patch-mainline: v5.17-rc1 +Git-commit: 2fe256a429cb6c0b0064563af4158470143a363c +References: jsc#PED-1368 + +Conversion is straightforward for most cases. In few cases tests are +using mutable map_flags and attribute structs, but bpf_map_create_opts +can be used in the similar fashion, so there were no problems. Just lots +of repetitive conversions. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211124193233.3115996-5-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + .../bpf/map_tests/array_map_batch_ops.c | 13 +-- + .../bpf/map_tests/htab_map_batch_ops.c | 13 +-- + .../bpf/map_tests/lpm_trie_map_batch_ops.c | 15 +-- + .../selftests/bpf/map_tests/sk_storage_map.c | 50 ++++---- + .../bpf/prog_tests/bloom_filter_map.c | 36 +++--- + .../selftests/bpf/prog_tests/bpf_iter.c | 8 +- + tools/testing/selftests/bpf/prog_tests/btf.c | 51 +++----- + .../bpf/prog_tests/cgroup_attach_multi.c | 12 +- + .../selftests/bpf/prog_tests/pinning.c | 4 +- + .../selftests/bpf/prog_tests/ringbuf_multi.c | 4 +- + .../bpf/prog_tests/select_reuseport.c | 21 +--- + .../selftests/bpf/prog_tests/sockmap_basic.c | 4 +- + .../selftests/bpf/prog_tests/sockmap_ktls.c | 2 +- + .../selftests/bpf/prog_tests/sockmap_listen.c | 4 +- + .../selftests/bpf/prog_tests/test_bpffs.c | 2 +- + .../selftests/bpf/test_cgroup_storage.c | 8 +- + tools/testing/selftests/bpf/test_lpm_map.c | 27 +++-- + tools/testing/selftests/bpf/test_lru_map.c | 16 +-- + tools/testing/selftests/bpf/test_maps.c | 110 +++++++++--------- + tools/testing/selftests/bpf/test_tag.c | 5 +- + tools/testing/selftests/bpf/test_verifier.c | 52 ++++----- + 21 files changed, 201 insertions(+), 256 deletions(-) + +diff --git a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c +index f4d870da7684..78c76496b14a 100644 +--- a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c ++++ b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c +@@ -68,13 +68,6 @@ static void map_batch_verify(int *visited, __u32 max_entries, int *keys, + + static void __test_map_lookup_and_update_batch(bool is_pcpu) + { +- struct bpf_create_map_attr xattr = { +- .name = "array_map", +- .map_type = is_pcpu ? BPF_MAP_TYPE_PERCPU_ARRAY : +- BPF_MAP_TYPE_ARRAY, +- .key_size = sizeof(int), +- .value_size = sizeof(__s64), +- }; + int map_fd, *keys, *visited; + __u32 count, total, total_success; + const __u32 max_entries = 10; +@@ -86,10 +79,10 @@ static void __test_map_lookup_and_update_batch(bool is_pcpu) + .flags = 0, + ); + +- xattr.max_entries = max_entries; +- map_fd = bpf_create_map_xattr(&xattr); ++ map_fd = bpf_map_create(is_pcpu ? BPF_MAP_TYPE_PERCPU_ARRAY : BPF_MAP_TYPE_ARRAY, ++ "array_map", sizeof(int), sizeof(__s64), max_entries, NULL); + CHECK(map_fd == -1, +- "bpf_create_map_xattr()", "error:%s\n", strerror(errno)); ++ "bpf_map_create()", "error:%s\n", strerror(errno)); + + value_size = sizeof(__s64); + if (is_pcpu) +diff --git a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c +index 976bf415fbdd..f807d53fd8dd 100644 +--- a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c ++++ b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c +@@ -83,22 +83,15 @@ void __test_map_lookup_and_delete_batch(bool is_pcpu) + int err, step, value_size; + bool nospace_err; + void *values; +- struct bpf_create_map_attr xattr = { +- .name = "hash_map", +- .map_type = is_pcpu ? BPF_MAP_TYPE_PERCPU_HASH : +- BPF_MAP_TYPE_HASH, +- .key_size = sizeof(int), +- .value_size = sizeof(int), +- }; + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, + .elem_flags = 0, + .flags = 0, + ); + +- xattr.max_entries = max_entries; +- map_fd = bpf_create_map_xattr(&xattr); ++ map_fd = bpf_map_create(is_pcpu ? BPF_MAP_TYPE_PERCPU_HASH : BPF_MAP_TYPE_HASH, ++ "hash_map", sizeof(int), sizeof(int), max_entries, NULL); + CHECK(map_fd == -1, +- "bpf_create_map_xattr()", "error:%s\n", strerror(errno)); ++ "bpf_map_create()", "error:%s\n", strerror(errno)); + + value_size = is_pcpu ? sizeof(value) : sizeof(int); + keys = malloc(max_entries * sizeof(int)); +diff --git a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c +index 2e986e5e4cac..87d07b596e17 100644 +--- a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c ++++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c +@@ -64,13 +64,7 @@ static void map_batch_verify(int *visited, __u32 max_entries, + + void test_lpm_trie_map_batch_ops(void) + { +- struct bpf_create_map_attr xattr = { +- .name = "lpm_trie_map", +- .map_type = BPF_MAP_TYPE_LPM_TRIE, +- .key_size = sizeof(struct test_lpm_key), +- .value_size = sizeof(int), +- .map_flags = BPF_F_NO_PREALLOC, +- }; ++ LIBBPF_OPTS(bpf_map_create_opts, create_opts, .map_flags = BPF_F_NO_PREALLOC); + struct test_lpm_key *keys, key; + int map_fd, *values, *visited; + __u32 step, count, total, total_success; +@@ -82,9 +76,10 @@ void test_lpm_trie_map_batch_ops(void) + .flags = 0, + ); + +- xattr.max_entries = max_entries; +- map_fd = bpf_create_map_xattr(&xattr); +- CHECK(map_fd == -1, "bpf_create_map_xattr()", "error:%s\n", ++ map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie_map", ++ sizeof(struct test_lpm_key), sizeof(int), ++ max_entries, &create_opts); ++ CHECK(map_fd == -1, "bpf_map_create()", "error:%s\n", + strerror(errno)); + + keys = malloc(max_entries * sizeof(struct test_lpm_key)); +diff --git a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c +index e569edc679d8..8eea4ffeb092 100644 +--- a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c ++++ b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c +@@ -19,16 +19,12 @@ + #include + #include + +-static struct bpf_create_map_attr xattr = { +- .name = "sk_storage_map", +- .map_type = BPF_MAP_TYPE_SK_STORAGE, +- .map_flags = BPF_F_NO_PREALLOC, +- .max_entries = 0, +- .key_size = 4, +- .value_size = 8, ++static struct bpf_map_create_opts map_opts = { ++ .sz = sizeof(map_opts), + .btf_key_type_id = 1, + .btf_value_type_id = 3, + .btf_fd = -1, ++ .map_flags = BPF_F_NO_PREALLOC, + }; + + static unsigned int nr_sk_threads_done; +@@ -150,13 +146,13 @@ static int create_sk_storage_map(void) + btf_fd = load_btf(); + CHECK(btf_fd == -1, "bpf_load_btf", "btf_fd:%d errno:%d\n", + btf_fd, errno); +- xattr.btf_fd = btf_fd; ++ map_opts.btf_fd = btf_fd; + +- map_fd = bpf_create_map_xattr(&xattr); +- xattr.btf_fd = -1; ++ map_fd = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &map_opts); ++ map_opts.btf_fd = -1; + close(btf_fd); + CHECK(map_fd == -1, +- "bpf_create_map_xattr()", "errno:%d\n", errno); ++ "bpf_map_create()", "errno:%d\n", errno); + + return map_fd; + } +@@ -463,20 +459,20 @@ static void test_sk_storage_map_basic(void) + int cnt; + int lock; + } value = { .cnt = 0xeB9f, .lock = 0, }, lookup_value; +- struct bpf_create_map_attr bad_xattr; ++ struct bpf_map_create_opts bad_xattr; + int btf_fd, map_fd, sk_fd, err; + + btf_fd = load_btf(); + CHECK(btf_fd == -1, "bpf_load_btf", "btf_fd:%d errno:%d\n", + btf_fd, errno); +- xattr.btf_fd = btf_fd; ++ map_opts.btf_fd = btf_fd; + + sk_fd = socket(AF_INET6, SOCK_STREAM, 0); + CHECK(sk_fd == -1, "socket()", "sk_fd:%d errno:%d\n", + sk_fd, errno); + +- map_fd = bpf_create_map_xattr(&xattr); +- CHECK(map_fd == -1, "bpf_create_map_xattr(good_xattr)", ++ map_fd = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &map_opts); ++ CHECK(map_fd == -1, "bpf_map_create(good_xattr)", + "map_fd:%d errno:%d\n", map_fd, errno); + + /* Add new elem */ +@@ -560,31 +556,29 @@ static void test_sk_storage_map_basic(void) + CHECK(!err || errno != ENOENT, "bpf_map_delete_elem()", + "err:%d errno:%d\n", err, errno); + +- memcpy(&bad_xattr, &xattr, sizeof(xattr)); ++ memcpy(&bad_xattr, &map_opts, sizeof(map_opts)); + bad_xattr.btf_key_type_id = 0; +- err = bpf_create_map_xattr(&bad_xattr); +- CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)", ++ err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &bad_xattr); ++ CHECK(!err || errno != EINVAL, "bpf_map_create(bad_xattr)", + "err:%d errno:%d\n", err, errno); + +- memcpy(&bad_xattr, &xattr, sizeof(xattr)); ++ memcpy(&bad_xattr, &map_opts, sizeof(map_opts)); + bad_xattr.btf_key_type_id = 3; +- err = bpf_create_map_xattr(&bad_xattr); +- CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)", ++ err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &bad_xattr); ++ CHECK(!err || errno != EINVAL, "bpf_map_create(bad_xattr)", + "err:%d errno:%d\n", err, errno); + +- memcpy(&bad_xattr, &xattr, sizeof(xattr)); +- bad_xattr.max_entries = 1; +- err = bpf_create_map_xattr(&bad_xattr); +- CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)", ++ err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 1, &map_opts); ++ CHECK(!err || errno != EINVAL, "bpf_map_create(bad_xattr)", + "err:%d errno:%d\n", err, errno); + +- memcpy(&bad_xattr, &xattr, sizeof(xattr)); ++ memcpy(&bad_xattr, &map_opts, sizeof(map_opts)); + bad_xattr.map_flags = 0; +- err = bpf_create_map_xattr(&bad_xattr); ++ err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &bad_xattr); + CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)", + "err:%d errno:%d\n", err, errno); + +- xattr.btf_fd = -1; ++ map_opts.btf_fd = -1; + close(btf_fd); + close(map_fd); + close(sk_fd); +diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c +index be73e3de6668..d2d9e965eba5 100644 +--- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c ++++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c +@@ -7,32 +7,33 @@ + + static void test_fail_cases(void) + { ++ LIBBPF_OPTS(bpf_map_create_opts, opts); + __u32 value; + int fd, err; + + /* Invalid key size */ +- fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 4, sizeof(value), 100, 0); +- if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid key size")) ++ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 4, sizeof(value), 100, NULL); ++ if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid key size")) + close(fd); + + /* Invalid value size */ +- fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, 0, 100, 0); +- if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid value size 0")) ++ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, 0, 100, NULL); ++ if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid value size 0")) + close(fd); + + /* Invalid max entries size */ +- fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 0, 0); +- if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid max entries size")) ++ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 0, NULL); ++ if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid max entries size")) + close(fd); + + /* Bloom filter maps do not support BPF_F_NO_PREALLOC */ +- fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 100, +- BPF_F_NO_PREALLOC); +- if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid flags")) ++ opts.map_flags = BPF_F_NO_PREALLOC; ++ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, &opts); ++ if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid flags")) + close(fd); + +- fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 100, 0); +- if (!ASSERT_GE(fd, 0, "bpf_create_map bloom filter")) ++ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, NULL); ++ if (!ASSERT_GE(fd, 0, "bpf_map_create bloom filter")) + return; + + /* Test invalid flags */ +@@ -56,13 +57,14 @@ static void test_fail_cases(void) + + static void test_success_cases(void) + { ++ LIBBPF_OPTS(bpf_map_create_opts, opts); + char value[11]; + int fd, err; + + /* Create a map */ +- fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 100, +- BPF_F_ZERO_SEED | BPF_F_NUMA_NODE); +- if (!ASSERT_GE(fd, 0, "bpf_create_map bloom filter success case")) ++ opts.map_flags = BPF_F_ZERO_SEED | BPF_F_NUMA_NODE; ++ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, &opts); ++ if (!ASSERT_GE(fd, 0, "bpf_map_create bloom filter success case")) + return; + + /* Add a value to the bloom filter */ +@@ -100,9 +102,9 @@ static void test_inner_map(struct bloom_filter_map *skel, const __u32 *rand_vals + struct bpf_link *link; + + /* Create a bloom filter map that will be used as the inner map */ +- inner_map_fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(*rand_vals), +- nr_rand_vals, 0); +- if (!ASSERT_GE(inner_map_fd, 0, "bpf_create_map bloom filter inner map")) ++ inner_map_fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(*rand_vals), ++ nr_rand_vals, NULL); ++ if (!ASSERT_GE(inner_map_fd, 0, "bpf_map_create bloom filter inner map")) + return; + + for (i = 0; i < nr_rand_vals; i++) { +diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +index 3e10abce3e5a..0b996be923b5 100644 +--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c ++++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +@@ -469,12 +469,12 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) + * fills seq_file buffer and then the other will trigger + * overflow and needs restart. + */ +- map1_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); +- if (CHECK(map1_fd < 0, "bpf_create_map", ++ map1_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL); ++ if (CHECK(map1_fd < 0, "bpf_map_create", + "map_creation failed: %s\n", strerror(errno))) + goto out; +- map2_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); +- if (CHECK(map2_fd < 0, "bpf_create_map", ++ map2_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL); ++ if (CHECK(map2_fd < 0, "bpf_map_create", + "map_creation failed: %s\n", strerror(errno))) + goto free_map1; + +diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c +index f9326a13badb..cab810bab593 100644 +--- a/tools/testing/selftests/bpf/prog_tests/btf.c ++++ b/tools/testing/selftests/bpf/prog_tests/btf.c +@@ -4074,7 +4074,7 @@ done: + static void do_test_raw(unsigned int test_num) + { + struct btf_raw_test *test = &raw_tests[test_num - 1]; +- struct bpf_create_map_attr create_attr = {}; ++ LIBBPF_OPTS(bpf_map_create_opts, opts); + int map_fd = -1, btf_fd = -1; + unsigned int raw_btf_size; + struct btf_header *hdr; +@@ -4117,16 +4117,11 @@ static void do_test_raw(unsigned int test_num) + if (err || btf_fd < 0) + goto done; + +- create_attr.name = test->map_name; +- create_attr.map_type = test->map_type; +- create_attr.key_size = test->key_size; +- create_attr.value_size = test->value_size; +- create_attr.max_entries = test->max_entries; +- create_attr.btf_fd = btf_fd; +- create_attr.btf_key_type_id = test->key_type_id; +- create_attr.btf_value_type_id = test->value_type_id; +- +- map_fd = bpf_create_map_xattr(&create_attr); ++ opts.btf_fd = btf_fd; ++ opts.btf_key_type_id = test->key_type_id; ++ opts.btf_value_type_id = test->value_type_id; ++ map_fd = bpf_map_create(test->map_type, test->map_name, ++ test->key_size, test->value_size, test->max_entries, &opts); + + err = ((map_fd < 0) != test->map_create_err); + CHECK(err, "map_fd:%d test->map_create_err:%u", +@@ -4290,7 +4285,7 @@ done: + static int test_btf_id(unsigned int test_num) + { + const struct btf_get_info_test *test = &get_info_tests[test_num - 1]; +- struct bpf_create_map_attr create_attr = {}; ++ LIBBPF_OPTS(bpf_map_create_opts, opts); + uint8_t *raw_btf = NULL, *user_btf[2] = {}; + int btf_fd[2] = {-1, -1}, map_fd = -1; + struct bpf_map_info map_info = {}; +@@ -4355,16 +4350,11 @@ static int test_btf_id(unsigned int test_num) + } + + /* Test btf members in struct bpf_map_info */ +- create_attr.name = "test_btf_id"; +- create_attr.map_type = BPF_MAP_TYPE_ARRAY; +- create_attr.key_size = sizeof(int); +- create_attr.value_size = sizeof(unsigned int); +- create_attr.max_entries = 4; +- create_attr.btf_fd = btf_fd[0]; +- create_attr.btf_key_type_id = 1; +- create_attr.btf_value_type_id = 2; +- +- map_fd = bpf_create_map_xattr(&create_attr); ++ opts.btf_fd = btf_fd[0]; ++ opts.btf_key_type_id = 1; ++ opts.btf_value_type_id = 2; ++ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_btf_id", ++ sizeof(int), sizeof(int), 4, &opts); + if (CHECK(map_fd < 0, "errno:%d", errno)) { + err = -1; + goto done; +@@ -5153,7 +5143,7 @@ static void do_test_pprint(int test_num) + { + const struct btf_raw_test *test = &pprint_test_template[test_num]; + enum pprint_mapv_kind_t mapv_kind = test->mapv_kind; +- struct bpf_create_map_attr create_attr = {}; ++ LIBBPF_OPTS(bpf_map_create_opts, opts); + bool ordered_map, lossless_map, percpu_map; + int err, ret, num_cpus, rounded_value_size; + unsigned int key, nr_read_elems; +@@ -5189,16 +5179,11 @@ static void do_test_pprint(int test_num) + goto done; + } + +- create_attr.name = test->map_name; +- create_attr.map_type = test->map_type; +- create_attr.key_size = test->key_size; +- create_attr.value_size = test->value_size; +- create_attr.max_entries = test->max_entries; +- create_attr.btf_fd = btf_fd; +- create_attr.btf_key_type_id = test->key_type_id; +- create_attr.btf_value_type_id = test->value_type_id; +- +- map_fd = bpf_create_map_xattr(&create_attr); ++ opts.btf_fd = btf_fd; ++ opts.btf_key_type_id = test->key_type_id; ++ opts.btf_value_type_id = test->value_type_id; ++ map_fd = bpf_map_create(test->map_type, test->map_name, ++ test->key_size, test->value_size, test->max_entries, &opts); + if (CHECK(map_fd < 0, "errno:%d", errno)) { + err = -1; + goto done; +diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c +index de9c3e12b0ea..d3e8f729c623 100644 +--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c ++++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c +@@ -15,22 +15,22 @@ static int prog_load_cnt(int verdict, int val) + int cgroup_storage_fd, percpu_cgroup_storage_fd; + + if (map_fd < 0) +- map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); ++ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL); + if (map_fd < 0) { + printf("failed to create map '%s'\n", strerror(errno)); + return -1; + } + +- cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, +- sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); ++ cgroup_storage_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_STORAGE, NULL, ++ sizeof(struct bpf_cgroup_storage_key), 8, 0, NULL); + if (cgroup_storage_fd < 0) { + printf("failed to create map '%s'\n", strerror(errno)); + return -1; + } + +- percpu_cgroup_storage_fd = bpf_create_map( +- BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, +- sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); ++ percpu_cgroup_storage_fd = bpf_map_create( ++ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, NULL, ++ sizeof(struct bpf_cgroup_storage_key), 8, 0, NULL); + if (percpu_cgroup_storage_fd < 0) { + printf("failed to create map '%s'\n", strerror(errno)); + return -1; +diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c +index d4b953ae3407..31c09ba577eb 100644 +--- a/tools/testing/selftests/bpf/prog_tests/pinning.c ++++ b/tools/testing/selftests/bpf/prog_tests/pinning.c +@@ -241,8 +241,8 @@ void test_pinning(void) + goto out; + } + +- map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(__u32), +- sizeof(__u64), 1, 0); ++ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(__u32), ++ sizeof(__u64), 1, NULL); + if (CHECK(map_fd < 0, "create pinmap manually", "fd %d\n", map_fd)) + goto out; + +diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c +index 167cd8a2edfd..e945195b24c9 100644 +--- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c ++++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c +@@ -62,8 +62,8 @@ void test_ringbuf_multi(void) + if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) + goto cleanup; + +- proto_fd = bpf_create_map(BPF_MAP_TYPE_RINGBUF, 0, 0, page_size, 0); +- if (CHECK(proto_fd < 0, "bpf_create_map", "bpf_create_map failed\n")) ++ proto_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, page_size, NULL); ++ if (CHECK(proto_fd < 0, "bpf_map_create", "bpf_map_create failed\n")) + goto cleanup; + + err = bpf_map__set_inner_map_fd(skel->maps.ringbuf_hash, proto_fd); +diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +index 3cfc910ab3c1..980ac0f2c0bb 100644 +--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c ++++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +@@ -66,29 +66,20 @@ static union sa46 { + + static int create_maps(enum bpf_map_type inner_type) + { +- struct bpf_create_map_attr attr = {}; ++ LIBBPF_OPTS(bpf_map_create_opts, opts); + + inner_map_type = inner_type; + + /* Creating reuseport_array */ +- attr.name = "reuseport_array"; +- attr.map_type = inner_type; +- attr.key_size = sizeof(__u32); +- attr.value_size = sizeof(__u32); +- attr.max_entries = REUSEPORT_ARRAY_SIZE; +- +- reuseport_array = bpf_create_map_xattr(&attr); ++ reuseport_array = bpf_map_create(inner_type, "reuseport_array", ++ sizeof(__u32), sizeof(__u32), REUSEPORT_ARRAY_SIZE, NULL); + RET_ERR(reuseport_array < 0, "creating reuseport_array", + "reuseport_array:%d errno:%d\n", reuseport_array, errno); + + /* Creating outer_map */ +- attr.name = "outer_map"; +- attr.map_type = BPF_MAP_TYPE_ARRAY_OF_MAPS; +- attr.key_size = sizeof(__u32); +- attr.value_size = sizeof(__u32); +- attr.max_entries = 1; +- attr.inner_map_fd = reuseport_array; +- outer_map = bpf_create_map_xattr(&attr); ++ opts.inner_map_fd = reuseport_array; ++ outer_map = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS, "outer_map", ++ sizeof(__u32), sizeof(__u32), 1, &opts); + RET_ERR(outer_map < 0, "creating outer_map", + "outer_map:%d errno:%d\n", outer_map, errno); + +diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +index 1352ec104149..85db0f4cdd95 100644 +--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c ++++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +@@ -91,9 +91,9 @@ static void test_sockmap_create_update_free(enum bpf_map_type map_type) + if (CHECK_FAIL(s < 0)) + return; + +- map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0); ++ map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL); + if (CHECK_FAIL(map < 0)) { +- perror("bpf_create_map"); ++ perror("bpf_cmap_create"); + goto out; + } + +diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +index 7a0d64fdc192..af293ea1542c 100644 +--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c ++++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +@@ -97,7 +97,7 @@ static void run_tests(int family, enum bpf_map_type map_type) + char test_name[MAX_TEST_NAME]; + int map; + +- map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0); ++ map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL); + if (CHECK_FAIL(map < 0)) { + perror("bpf_map_create"); + return; +diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +index 2a9cb951bfd6..7e21bfab6358 100644 +--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c ++++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +@@ -502,8 +502,8 @@ static void test_lookup_32_bit_value(int family, int sotype, int mapfd) + if (s < 0) + return; + +- mapfd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(key), +- sizeof(value32), 1, 0); ++ mapfd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(key), ++ sizeof(value32), 1, NULL); + if (mapfd < 0) { + FAIL_ERRNO("map_create"); + goto close; +diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c +index d29ebfeef9c5..ada95bfb9b1b 100644 +--- a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c ++++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c +@@ -80,7 +80,7 @@ static int fn(void) + if (!ASSERT_OK(err, "creating " TDIR "/fs1/b")) + goto out; + +- map = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 4, 1, 0); ++ map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL); + if (!ASSERT_GT(map, 0, "create_map(ARRAY)")) + goto out; + err = bpf_obj_pin(map, TDIR "/fs1/c"); +diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c +index a63787e7bb1a..5b8314cd77fd 100644 +--- a/tools/testing/selftests/bpf/test_cgroup_storage.c ++++ b/tools/testing/selftests/bpf/test_cgroup_storage.c +@@ -51,15 +51,15 @@ int main(int argc, char **argv) + goto err; + } + +- map_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key), +- sizeof(value), 0, 0); ++ map_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_STORAGE, NULL, sizeof(key), ++ sizeof(value), 0, NULL); + if (map_fd < 0) { + printf("Failed to create map: %s\n", strerror(errno)); + goto out; + } + +- percpu_map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, +- sizeof(key), sizeof(value), 0, 0); ++ percpu_map_fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, NULL, ++ sizeof(key), sizeof(value), 0, NULL); + if (percpu_map_fd < 0) { + printf("Failed to create map: %s\n", strerror(errno)); + goto out; +diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c +index 006be3963977..baa3e3ecae82 100644 +--- a/tools/testing/selftests/bpf/test_lpm_map.c ++++ b/tools/testing/selftests/bpf/test_lpm_map.c +@@ -208,6 +208,7 @@ static void test_lpm_order(void) + + static void test_lpm_map(int keysize) + { ++ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); + size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups; + struct tlpm_node *t, *list = NULL; + struct bpf_lpm_trie_key *key; +@@ -233,11 +234,11 @@ static void test_lpm_map(int keysize) + key = alloca(sizeof(*key) + keysize); + memset(key, 0, sizeof(*key) + keysize); + +- map = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, ++ map = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, + sizeof(*key) + keysize, + keysize + 1, + 4096, +- BPF_F_NO_PREALLOC); ++ &opts); + assert(map >= 0); + + for (i = 0; i < n_nodes; ++i) { +@@ -329,6 +330,7 @@ static void test_lpm_map(int keysize) + + static void test_lpm_ipaddr(void) + { ++ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); + struct bpf_lpm_trie_key *key_ipv4; + struct bpf_lpm_trie_key *key_ipv6; + size_t key_size_ipv4; +@@ -342,14 +344,14 @@ static void test_lpm_ipaddr(void) + key_ipv4 = alloca(key_size_ipv4); + key_ipv6 = alloca(key_size_ipv6); + +- map_fd_ipv4 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, ++ map_fd_ipv4 = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, + key_size_ipv4, sizeof(value), +- 100, BPF_F_NO_PREALLOC); ++ 100, &opts); + assert(map_fd_ipv4 >= 0); + +- map_fd_ipv6 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, ++ map_fd_ipv6 = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, + key_size_ipv6, sizeof(value), +- 100, BPF_F_NO_PREALLOC); ++ 100, &opts); + assert(map_fd_ipv6 >= 0); + + /* Fill data some IPv4 and IPv6 address ranges */ +@@ -423,6 +425,7 @@ static void test_lpm_ipaddr(void) + + static void test_lpm_delete(void) + { ++ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); + struct bpf_lpm_trie_key *key; + size_t key_size; + int map_fd; +@@ -431,9 +434,9 @@ static void test_lpm_delete(void) + key_size = sizeof(*key) + sizeof(__u32); + key = alloca(key_size); + +- map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, ++ map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, + key_size, sizeof(value), +- 100, BPF_F_NO_PREALLOC); ++ 100, &opts); + assert(map_fd >= 0); + + /* Add nodes: +@@ -535,6 +538,7 @@ static void test_lpm_delete(void) + + static void test_lpm_get_next_key(void) + { ++ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); + struct bpf_lpm_trie_key *key_p, *next_key_p; + size_t key_size; + __u32 value = 0; +@@ -544,8 +548,7 @@ static void test_lpm_get_next_key(void) + key_p = alloca(key_size); + next_key_p = alloca(key_size); + +- map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, sizeof(value), +- 100, BPF_F_NO_PREALLOC); ++ map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, sizeof(value), 100, &opts); + assert(map_fd >= 0); + + /* empty tree. get_next_key should return ENOENT */ +@@ -753,6 +756,7 @@ static void setup_lpm_mt_test_info(struct lpm_mt_test_info *info, int map_fd) + + static void test_lpm_multi_thread(void) + { ++ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); + struct lpm_mt_test_info info[4]; + size_t key_size, value_size; + pthread_t thread_id[4]; +@@ -762,8 +766,7 @@ static void test_lpm_multi_thread(void) + /* create a trie */ + value_size = sizeof(__u32); + key_size = sizeof(struct bpf_lpm_trie_key) + value_size; +- map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, value_size, +- 100, BPF_F_NO_PREALLOC); ++ map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, value_size, 100, &opts); + + /* create 4 threads to test update, delete, lookup and get_next_key */ + setup_lpm_mt_test_info(&info[0], map_fd); +diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c +index 7f3d1d8460b4..b9f1bbbc8aba 100644 +--- a/tools/testing/selftests/bpf/test_lru_map.c ++++ b/tools/testing/selftests/bpf/test_lru_map.c +@@ -28,13 +28,14 @@ static int nr_cpus; + + static int create_map(int map_type, int map_flags, unsigned int size) + { ++ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); + int map_fd; + +- map_fd = bpf_create_map(map_type, sizeof(unsigned long long), +- sizeof(unsigned long long), size, map_flags); ++ map_fd = bpf_map_create(map_type, NULL, sizeof(unsigned long long), ++ sizeof(unsigned long long), size, &opts); + + if (map_fd == -1) +- perror("bpf_create_map"); ++ perror("bpf_map_create"); + + return map_fd; + } +@@ -42,7 +43,6 @@ static int create_map(int map_type, int map_flags, unsigned int size) + static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key, + void *value) + { +- struct bpf_create_map_attr map; + struct bpf_insn insns[] = { + BPF_LD_MAP_VALUE(BPF_REG_9, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, fd), +@@ -63,13 +63,7 @@ static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key, + int mfd, pfd, ret, zero = 0; + __u32 retval = 0; + +- memset(&map, 0, sizeof(map)); +- map.map_type = BPF_MAP_TYPE_ARRAY; +- map.key_size = sizeof(int); +- map.value_size = sizeof(unsigned long long); +- map.max_entries = 1; +- +- mfd = bpf_create_map_xattr(&map); ++ mfd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(__u64), 1, NULL); + if (mfd < 0) + return -1; + +diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c +index 8b31bc1a801d..f4cd658bbe00 100644 +--- a/tools/testing/selftests/bpf/test_maps.c ++++ b/tools/testing/selftests/bpf/test_maps.c +@@ -33,15 +33,14 @@ + + static int skips; + +-static int map_flags; ++static struct bpf_map_create_opts map_opts = { .sz = sizeof(map_opts) }; + + static void test_hashmap(unsigned int task, void *data) + { + long long key, next_key, first_key, value; + int fd; + +- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), +- 2, map_flags); ++ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value), 2, &map_opts); + if (fd < 0) { + printf("Failed to create hashmap '%s'!\n", strerror(errno)); + exit(1); +@@ -138,8 +137,7 @@ static void test_hashmap_sizes(unsigned int task, void *data) + + for (i = 1; i <= 512; i <<= 1) + for (j = 1; j <= 1 << 18; j <<= 1) { +- fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j, +- 2, map_flags); ++ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, i, j, 2, &map_opts); + if (fd < 0) { + if (errno == ENOMEM) + return; +@@ -160,8 +158,8 @@ static void test_hashmap_percpu(unsigned int task, void *data) + int expected_key_mask = 0; + int fd, i; + +- fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key), +- sizeof(bpf_percpu(value, 0)), 2, map_flags); ++ fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_HASH, NULL, sizeof(key), ++ sizeof(bpf_percpu(value, 0)), 2, &map_opts); + if (fd < 0) { + printf("Failed to create hashmap '%s'!\n", strerror(errno)); + exit(1); +@@ -272,11 +270,11 @@ static int helper_fill_hashmap(int max_entries) + int i, fd, ret; + long long key, value; + +- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), +- max_entries, map_flags); ++ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value), ++ max_entries, &map_opts); + CHECK(fd < 0, + "failed to create hashmap", +- "err: %s, flags: 0x%x\n", strerror(errno), map_flags); ++ "err: %s, flags: 0x%x\n", strerror(errno), map_opts.map_flags); + + for (i = 0; i < max_entries; i++) { + key = i; value = key; +@@ -332,8 +330,8 @@ static void test_hashmap_zero_seed(void) + int i, first, second, old_flags; + long long key, next_first, next_second; + +- old_flags = map_flags; +- map_flags |= BPF_F_ZERO_SEED; ++ old_flags = map_opts.map_flags; ++ map_opts.map_flags |= BPF_F_ZERO_SEED; + + first = helper_fill_hashmap(3); + second = helper_fill_hashmap(3); +@@ -355,7 +353,7 @@ static void test_hashmap_zero_seed(void) + key = next_first; + } + +- map_flags = old_flags; ++ map_opts.map_flags = old_flags; + close(first); + close(second); + } +@@ -365,8 +363,7 @@ static void test_arraymap(unsigned int task, void *data) + int key, next_key, fd; + long long value; + +- fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value), +- 2, 0); ++ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(key), sizeof(value), 2, NULL); + if (fd < 0) { + printf("Failed to create arraymap '%s'!\n", strerror(errno)); + exit(1); +@@ -421,8 +418,8 @@ static void test_arraymap_percpu(unsigned int task, void *data) + BPF_DECLARE_PERCPU(long, values); + int key, next_key, fd, i; + +- fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key), +- sizeof(bpf_percpu(values, 0)), 2, 0); ++ fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, NULL, sizeof(key), ++ sizeof(bpf_percpu(values, 0)), 2, NULL); + if (fd < 0) { + printf("Failed to create arraymap '%s'!\n", strerror(errno)); + exit(1); +@@ -484,8 +481,8 @@ static void test_arraymap_percpu_many_keys(void) + unsigned int nr_keys = 2000; + int key, fd, i; + +- fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key), +- sizeof(bpf_percpu(values, 0)), nr_keys, 0); ++ fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, NULL, sizeof(key), ++ sizeof(bpf_percpu(values, 0)), nr_keys, NULL); + if (fd < 0) { + printf("Failed to create per-cpu arraymap '%s'!\n", + strerror(errno)); +@@ -516,8 +513,7 @@ static void test_devmap(unsigned int task, void *data) + int fd; + __u32 key, value; + +- fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP, sizeof(key), sizeof(value), +- 2, 0); ++ fd = bpf_map_create(BPF_MAP_TYPE_DEVMAP, NULL, sizeof(key), sizeof(value), 2, NULL); + if (fd < 0) { + printf("Failed to create devmap '%s'!\n", strerror(errno)); + exit(1); +@@ -531,8 +527,7 @@ static void test_devmap_hash(unsigned int task, void *data) + int fd; + __u32 key, value; + +- fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP_HASH, sizeof(key), sizeof(value), +- 2, 0); ++ fd = bpf_map_create(BPF_MAP_TYPE_DEVMAP_HASH, NULL, sizeof(key), sizeof(value), 2, NULL); + if (fd < 0) { + printf("Failed to create devmap_hash '%s'!\n", strerror(errno)); + exit(1); +@@ -552,14 +547,12 @@ static void test_queuemap(unsigned int task, void *data) + vals[i] = rand(); + + /* Invalid key size */ +- fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 4, sizeof(val), MAP_SIZE, +- map_flags); ++ fd = bpf_map_create(BPF_MAP_TYPE_QUEUE, NULL, 4, sizeof(val), MAP_SIZE, &map_opts); + assert(fd < 0 && errno == EINVAL); + +- fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 0, sizeof(val), MAP_SIZE, +- map_flags); ++ fd = bpf_map_create(BPF_MAP_TYPE_QUEUE, NULL, 0, sizeof(val), MAP_SIZE, &map_opts); + /* Queue map does not support BPF_F_NO_PREALLOC */ +- if (map_flags & BPF_F_NO_PREALLOC) { ++ if (map_opts.map_flags & BPF_F_NO_PREALLOC) { + assert(fd < 0 && errno == EINVAL); + return; + } +@@ -610,14 +603,12 @@ static void test_stackmap(unsigned int task, void *data) + vals[i] = rand(); + + /* Invalid key size */ +- fd = bpf_create_map(BPF_MAP_TYPE_STACK, 4, sizeof(val), MAP_SIZE, +- map_flags); ++ fd = bpf_map_create(BPF_MAP_TYPE_STACK, NULL, 4, sizeof(val), MAP_SIZE, &map_opts); + assert(fd < 0 && errno == EINVAL); + +- fd = bpf_create_map(BPF_MAP_TYPE_STACK, 0, sizeof(val), MAP_SIZE, +- map_flags); ++ fd = bpf_map_create(BPF_MAP_TYPE_STACK, NULL, 0, sizeof(val), MAP_SIZE, &map_opts); + /* Stack map does not support BPF_F_NO_PREALLOC */ +- if (map_flags & BPF_F_NO_PREALLOC) { ++ if (map_opts.map_flags & BPF_F_NO_PREALLOC) { + assert(fd < 0 && errno == EINVAL); + return; + } +@@ -744,9 +735,9 @@ static void test_sockmap(unsigned int tasks, void *data) + } + + /* Test sockmap with connected sockets */ +- fd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP, ++ fd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, + sizeof(key), sizeof(value), +- 6, 0); ++ 6, NULL); + if (fd < 0) { + if (!bpf_probe_map_type(BPF_MAP_TYPE_SOCKMAP, 0)) { + printf("%s SKIP (unsupported map type BPF_MAP_TYPE_SOCKMAP)\n", +@@ -1168,8 +1159,7 @@ static void test_map_in_map(void) + + obj = bpf_object__open(MAPINMAP_PROG); + +- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int), sizeof(int), +- 2, 0); ++ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int), sizeof(int), 2, NULL); + if (fd < 0) { + printf("Failed to create hashmap '%s'!\n", strerror(errno)); + exit(1); +@@ -1315,8 +1305,8 @@ static void test_map_large(void) + } key; + int fd, i, value; + +- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), +- MAP_SIZE, map_flags); ++ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value), ++ MAP_SIZE, &map_opts); + if (fd < 0) { + printf("Failed to create large map '%s'!\n", strerror(errno)); + exit(1); +@@ -1469,8 +1459,8 @@ static void test_map_parallel(void) + int i, fd, key = 0, value = 0; + int data[2]; + +- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), +- MAP_SIZE, map_flags); ++ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value), ++ MAP_SIZE, &map_opts); + if (fd < 0) { + printf("Failed to create map for parallel test '%s'!\n", + strerror(errno)); +@@ -1518,9 +1508,13 @@ static void test_map_parallel(void) + static void test_map_rdonly(void) + { + int fd, key = 0, value = 0; ++ __u32 old_flags; + +- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), +- MAP_SIZE, map_flags | BPF_F_RDONLY); ++ old_flags = map_opts.map_flags; ++ map_opts.map_flags |= BPF_F_RDONLY; ++ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value), ++ MAP_SIZE, &map_opts); ++ map_opts.map_flags = old_flags; + if (fd < 0) { + printf("Failed to create map for read only test '%s'!\n", + strerror(errno)); +@@ -1543,9 +1537,13 @@ static void test_map_rdonly(void) + static void test_map_wronly_hash(void) + { + int fd, key = 0, value = 0; ++ __u32 old_flags; + +- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), +- MAP_SIZE, map_flags | BPF_F_WRONLY); ++ old_flags = map_opts.map_flags; ++ map_opts.map_flags |= BPF_F_WRONLY; ++ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value), ++ MAP_SIZE, &map_opts); ++ map_opts.map_flags = old_flags; + if (fd < 0) { + printf("Failed to create map for write only test '%s'!\n", + strerror(errno)); +@@ -1567,13 +1565,17 @@ static void test_map_wronly_hash(void) + static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type) + { + int fd, value = 0; ++ __u32 old_flags; ++ + + assert(map_type == BPF_MAP_TYPE_QUEUE || + map_type == BPF_MAP_TYPE_STACK); +- fd = bpf_create_map(map_type, 0, sizeof(value), MAP_SIZE, +- map_flags | BPF_F_WRONLY); ++ old_flags = map_opts.map_flags; ++ map_opts.map_flags |= BPF_F_WRONLY; ++ fd = bpf_map_create(map_type, NULL, 0, sizeof(value), MAP_SIZE, &map_opts); ++ map_opts.map_flags = old_flags; + /* Stack/Queue maps do not support BPF_F_NO_PREALLOC */ +- if (map_flags & BPF_F_NO_PREALLOC) { ++ if (map_opts.map_flags & BPF_F_NO_PREALLOC) { + assert(fd < 0 && errno == EINVAL); + return; + } +@@ -1700,8 +1702,8 @@ static void test_reuseport_array(void) + __u32 fds_idx = 0; + int fd; + +- map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, +- sizeof(__u32), sizeof(__u64), array_size, 0); ++ map_fd = bpf_map_create(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, NULL, ++ sizeof(__u32), sizeof(__u64), array_size, NULL); + CHECK(map_fd < 0, "reuseport array create", + "map_fd:%d, errno:%d\n", map_fd, errno); + +@@ -1837,8 +1839,8 @@ static void test_reuseport_array(void) + close(map_fd); + + /* Test 32 bit fd */ +- map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, +- sizeof(__u32), sizeof(__u32), array_size, 0); ++ map_fd = bpf_map_create(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, NULL, ++ sizeof(__u32), sizeof(__u32), array_size, NULL); + CHECK(map_fd < 0, "reuseport array create", + "map_fd:%d, errno:%d\n", map_fd, errno); + prepare_reuseport_grp(SOCK_STREAM, map_fd, sizeof(__u32), &fd64, +@@ -1896,10 +1898,10 @@ int main(void) + + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + +- map_flags = 0; ++ map_opts.map_flags = 0; + run_all_tests(); + +- map_flags = BPF_F_NO_PREALLOC; ++ map_opts.map_flags = BPF_F_NO_PREALLOC; + run_all_tests(); + + #define DEFINE_TEST(name) test_##name(); +diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c +index 5c7bea525626..0851c42ee31c 100644 +--- a/tools/testing/selftests/bpf/test_tag.c ++++ b/tools/testing/selftests/bpf/test_tag.c +@@ -185,11 +185,12 @@ static void do_test(uint32_t *tests, int start_insns, int fd_map, + + int main(void) + { ++ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); + uint32_t tests = 0; + int i, fd_map; + +- fd_map = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int), +- sizeof(int), 1, BPF_F_NO_PREALLOC); ++ fd_map = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int), ++ sizeof(int), 1, &opts); + assert(fd_map > 0); + + for (i = 0; i < 5; i++) { +diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c +index e512b715a785..222cb063ddf4 100644 +--- a/tools/testing/selftests/bpf/test_verifier.c ++++ b/tools/testing/selftests/bpf/test_verifier.c +@@ -461,11 +461,11 @@ static int __create_map(uint32_t type, uint32_t size_key, + uint32_t size_value, uint32_t max_elem, + uint32_t extra_flags) + { ++ LIBBPF_OPTS(bpf_map_create_opts, opts); + int fd; + +- fd = bpf_create_map(type, size_key, size_value, max_elem, +- (type == BPF_MAP_TYPE_HASH ? +- BPF_F_NO_PREALLOC : 0) | extra_flags); ++ opts.map_flags = (type == BPF_MAP_TYPE_HASH ? BPF_F_NO_PREALLOC : 0) | extra_flags; ++ fd = bpf_map_create(type, NULL, size_key, size_value, max_elem, &opts); + if (fd < 0) { + if (skip_unsupported_map(type)) + return -1; +@@ -521,8 +521,8 @@ static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem, + { + int mfd, p1fd, p2fd, p3fd; + +- mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int), +- sizeof(int), max_elem, 0); ++ mfd = bpf_map_create(BPF_MAP_TYPE_PROG_ARRAY, NULL, sizeof(int), ++ sizeof(int), max_elem, NULL); + if (mfd < 0) { + if (skip_unsupported_map(BPF_MAP_TYPE_PROG_ARRAY)) + return -1; +@@ -552,10 +552,11 @@ err: + + static int create_map_in_map(void) + { ++ LIBBPF_OPTS(bpf_map_create_opts, opts); + int inner_map_fd, outer_map_fd; + +- inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(int), +- sizeof(int), 1, 0); ++ inner_map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), ++ sizeof(int), 1, NULL); + if (inner_map_fd < 0) { + if (skip_unsupported_map(BPF_MAP_TYPE_ARRAY)) + return -1; +@@ -563,8 +564,9 @@ static int create_map_in_map(void) + return inner_map_fd; + } + +- outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL, +- sizeof(int), inner_map_fd, 1, 0); ++ opts.inner_map_fd = inner_map_fd; ++ outer_map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL, ++ sizeof(int), sizeof(int), 1, &opts); + if (outer_map_fd < 0) { + if (skip_unsupported_map(BPF_MAP_TYPE_ARRAY_OF_MAPS)) + return -1; +@@ -583,8 +585,8 @@ static int create_cgroup_storage(bool percpu) + BPF_MAP_TYPE_CGROUP_STORAGE; + int fd; + +- fd = bpf_create_map(type, sizeof(struct bpf_cgroup_storage_key), +- TEST_DATA_LEN, 0, 0); ++ fd = bpf_map_create(type, NULL, sizeof(struct bpf_cgroup_storage_key), ++ TEST_DATA_LEN, 0, NULL); + if (fd < 0) { + if (skip_unsupported_map(type)) + return -1; +@@ -648,22 +650,17 @@ static int load_btf(void) + + static int create_map_spin_lock(void) + { +- struct bpf_create_map_attr attr = { +- .name = "test_map", +- .map_type = BPF_MAP_TYPE_ARRAY, +- .key_size = 4, +- .value_size = 8, +- .max_entries = 1, ++ LIBBPF_OPTS(bpf_map_create_opts, opts, + .btf_key_type_id = 1, + .btf_value_type_id = 3, +- }; ++ ); + int fd, btf_fd; + + btf_fd = load_btf(); + if (btf_fd < 0) + return -1; +- attr.btf_fd = btf_fd; +- fd = bpf_create_map_xattr(&attr); ++ opts.btf_fd = btf_fd; ++ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_map", 4, 8, 1, &opts); + if (fd < 0) + printf("Failed to create map with spin_lock\n"); + return fd; +@@ -671,24 +668,19 @@ static int create_map_spin_lock(void) + + static int create_sk_storage_map(void) + { +- struct bpf_create_map_attr attr = { +- .name = "test_map", +- .map_type = BPF_MAP_TYPE_SK_STORAGE, +- .key_size = 4, +- .value_size = 8, +- .max_entries = 0, ++ LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_flags = BPF_F_NO_PREALLOC, + .btf_key_type_id = 1, + .btf_value_type_id = 3, +- }; ++ ); + int fd, btf_fd; + + btf_fd = load_btf(); + if (btf_fd < 0) + return -1; +- attr.btf_fd = btf_fd; +- fd = bpf_create_map_xattr(&attr); +- close(attr.btf_fd); ++ opts.btf_fd = btf_fd; ++ fd = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "test_map", 4, 8, 0, &opts); ++ close(opts.btf_fd); + if (fd < 0) + printf("Failed to create sk_storage_map\n"); + return fd; +-- +2.38.1 + diff --git a/patches.suse/selftests-bpf-Minor-cleanups-and-normalization-of-Ma.patch b/patches.suse/selftests-bpf-Minor-cleanups-and-normalization-of-Ma.patch new file mode 100644 index 0000000..ddb48b0 --- /dev/null +++ b/patches.suse/selftests-bpf-Minor-cleanups-and-normalization-of-Ma.patch @@ -0,0 +1,117 @@ +From: Andrii Nakryiko +Date: Wed, 10 Nov 2021 21:36:17 -0800 +Subject: selftests/bpf: Minor cleanups and normalization of Makefile +Patch-mainline: v5.17-rc1 +Git-commit: de29e6bbb9ee674d639cd42fe565f28757208614 +References: jsc#PED-1368 + +Few clean ups and single-line simplifications. Also split CLEAN command +into multiple $(RM) invocations as it gets dangerously close to too long +argument list. Make sure that -o is used always as the last +argument for saner verbose make output. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211111053624.190580-3-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/Makefile | 32 ++++++++++++++++---------------- + 1 file changed, 16 insertions(+), 16 deletions(-) + +--- a/tools/testing/selftests/bpf/Makefile ++++ b/tools/testing/selftests/bpf/Makefile +@@ -45,10 +45,8 @@ ifneq ($(BPF_GCC),) + TEST_GEN_PROGS += test_progs-bpf_gcc + endif + +-TEST_GEN_FILES = test_lwt_ip_encap.o \ +- test_tc_edt.o +-TEST_FILES = xsk_prereqs.sh \ +- $(wildcard progs/btf_dump_test_case_*.c) ++TEST_GEN_FILES = test_lwt_ip_encap.o test_tc_edt.o ++TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c) + + # Order correspond to 'make run_tests' order + TEST_PROGS := test_kmod.sh \ +@@ -107,7 +105,10 @@ endif + OVERRIDE_TARGETS := 1 + override define CLEAN + $(call msg,CLEAN) +- $(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) ++ $(Q)$(RM) -r $(TEST_GEN_PROGS) ++ $(Q)$(RM) -r $(TEST_GEN_PROGS_EXTENDED) ++ $(Q)$(RM) -r $(TEST_GEN_FILES) ++ $(Q)$(RM) -r $(EXTRA_CLEAN) + $(Q)$(MAKE) -C bpf_testmod clean + $(Q)$(MAKE) docs-clean + endef +@@ -169,7 +170,7 @@ $(OUTPUT)/%:%.c + + $(OUTPUT)/urandom_read: urandom_read.c + $(call msg,BINARY,,$@) +- $(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id=sha1 ++ $(Q)$(CC) $(LDFLAGS) $< $(LDLIBS) -Wl,--build-id=sha1 -o $@ + + $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) + $(call msg,MOD,,$@) +@@ -232,16 +233,16 @@ docs-clean: + prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@ + + $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ +- ../../../include/uapi/linux/bpf.h \ ++ $(APIDIR)/linux/bpf.h \ + | $(BUILD_DIR)/libbpf + $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ + EXTRA_CFLAGS='-g -O0' \ + DESTDIR=$(SCRATCH_DIR) prefix= all install_headers + + ifneq ($(BPFOBJ),$(HOST_BPFOBJ)) +-$(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ +- ../../../include/uapi/linux/bpf.h \ +- | $(HOST_BUILD_DIR)/libbpf ++$(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ ++ $(APIDIR)/linux/bpf.h \ ++ | $(HOST_BUILD_DIR)/libbpf + $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \ + EXTRA_CFLAGS='-g -O0' \ + OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \ +@@ -305,12 +306,12 @@ $(OUTPUT)/flow_dissector_load.o: flow_di + # $3 - CFLAGS + define CLANG_BPF_BUILD_RULE + $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) +- $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -o $2 -mcpu=v3 ++ $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -mcpu=v3 -o $2 + endef + # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 + define CLANG_NOALU32_BPF_BUILD_RULE + $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) +- $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -o $2 -mcpu=v2 ++ $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -mcpu=v2 -o $2 + endef + # Build BPF object using GCC + define GCC_BPF_BUILD_RULE +@@ -472,13 +473,12 @@ TRUNNER_TESTS_DIR := prog_tests + TRUNNER_BPF_PROGS_DIR := progs + TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ + network_helpers.c testing_helpers.c \ +- btf_helpers.c flow_dissector_load.h ++ btf_helpers.c flow_dissector_load.h + TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ + ima_setup.sh \ + $(wildcard progs/btf_dump_test_case_*.c) + TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE +-TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) +-TRUNNER_BPF_CFLAGS += -DENABLE_ATOMICS_TESTS ++TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) -DENABLE_ATOMICS_TESTS + $(eval $(call DEFINE_TEST_RUNNER,test_progs)) + + # Define test_progs-no_alu32 test runner. +@@ -540,7 +540,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUT + $(OUTPUT)/bench_ringbufs.o \ + $(OUTPUT)/bench_bloom_filter_map.o + $(call msg,BINARY,,$@) +- $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) ++ $(Q)$(CC) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ + + EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ + prog_tests/tests.h map_tests/tests.h verifier/tests.h \ diff --git a/patches.suse/selftests-bpf-Mix-legacy-maps-and-modern-vars-BPF-in.patch b/patches.suse/selftests-bpf-Mix-legacy-maps-and-modern-vars-BPF-in.patch new file mode 100644 index 0000000..2313d12 --- /dev/null +++ b/patches.suse/selftests-bpf-Mix-legacy-maps-and-modern-vars-BPF-in.patch @@ -0,0 +1,174 @@ +From: Andrii Nakryiko +Date: Tue, 23 Nov 2021 12:01:05 -0800 +Subject: selftests/bpf: Mix legacy (maps) and modern (vars) BPF in one test +Patch-mainline: v5.17-rc1 +Git-commit: e4f7ac90c2b09766e4acf771908987391c836413 +References: jsc#PED-1368 + +Add selftest that combines two BPF programs within single BPF object +file such that one of the programs is using global variables, but can be +skipped at runtime on old kernels that don't support global data. +Another BPF program is written with the goal to be runnable on very old +kernels and only relies on explicitly accessed BPF maps. + +Such test, run against old kernels (e.g., libbpf CI will run it against 4.9 +kernel that doesn't support global data), allows to test the approach +and ensure that libbpf doesn't make unnecessary assumption about +necessary kernel features. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Acked-by: Song Liu +Link: https://lore.kernel.org/bpf/20211123200105.387855-2-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/legacy_printk.c | 65 +++++++++++++++ + tools/testing/selftests/bpf/progs/test_legacy_printk.c | 73 +++++++++++++++++ + 2 files changed, 138 insertions(+) + create mode 100644 tools/testing/selftests/bpf/prog_tests/legacy_printk.c + create mode 100644 tools/testing/selftests/bpf/progs/test_legacy_printk.c + +--- /dev/null ++++ b/tools/testing/selftests/bpf/prog_tests/legacy_printk.c +@@ -0,0 +1,65 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) 2021 Facebook */ ++#include ++#include "test_legacy_printk.skel.h" ++ ++static int execute_one_variant(bool legacy) ++{ ++ struct test_legacy_printk *skel; ++ int err, zero = 0, my_pid = getpid(), res, map_fd; ++ ++ skel = test_legacy_printk__open(); ++ if (!ASSERT_OK_PTR(skel, "skel_open")) ++ return -errno; ++ ++ bpf_program__set_autoload(skel->progs.handle_legacy, legacy); ++ bpf_program__set_autoload(skel->progs.handle_modern, !legacy); ++ ++ err = test_legacy_printk__load(skel); ++ /* no ASSERT_OK, we expect one of two variants can fail here */ ++ if (err) ++ goto err_out; ++ ++ if (legacy) { ++ map_fd = bpf_map__fd(skel->maps.my_pid_map); ++ err = bpf_map_update_elem(map_fd, &zero, &my_pid, BPF_ANY); ++ if (!ASSERT_OK(err, "my_pid_map_update")) ++ goto err_out; ++ err = bpf_map_lookup_elem(map_fd, &zero, &res); ++ } else { ++ skel->bss->my_pid_var = my_pid; ++ } ++ ++ err = test_legacy_printk__attach(skel); ++ if (!ASSERT_OK(err, "skel_attach")) ++ goto err_out; ++ ++ usleep(1); /* trigger */ ++ ++ if (legacy) { ++ map_fd = bpf_map__fd(skel->maps.res_map); ++ err = bpf_map_lookup_elem(map_fd, &zero, &res); ++ if (!ASSERT_OK(err, "res_map_lookup")) ++ goto err_out; ++ } else { ++ res = skel->bss->res_var; ++ } ++ ++ if (!ASSERT_GT(res, 0, "res")) { ++ err = -EINVAL; ++ goto err_out; ++ } ++ ++err_out: ++ test_legacy_printk__destroy(skel); ++ return err; ++} ++ ++void test_legacy_printk(void) ++{ ++ /* legacy variant should work everywhere */ ++ ASSERT_OK(execute_one_variant(true /* legacy */), "legacy_case"); ++ ++ /* execute modern variant, can fail the load on old kernels */ ++ execute_one_variant(false); ++} +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/test_legacy_printk.c +@@ -0,0 +1,73 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) 2021 Facebook */ ++ ++#include ++#define BPF_NO_GLOBAL_DATA ++#include ++ ++char LICENSE[] SEC("license") = "GPL"; ++ ++struct { ++ __uint(type, BPF_MAP_TYPE_ARRAY); ++ __type(key, int); ++ __type(value, int); ++ __uint(max_entries, 1); ++} my_pid_map SEC(".maps"); ++ ++struct { ++ __uint(type, BPF_MAP_TYPE_ARRAY); ++ __type(key, int); ++ __type(value, int); ++ __uint(max_entries, 1); ++} res_map SEC(".maps"); ++ ++volatile int my_pid_var = 0; ++volatile int res_var = 0; ++ ++SEC("tp/raw_syscalls/sys_enter") ++int handle_legacy(void *ctx) ++{ ++ int zero = 0, *my_pid, cur_pid, *my_res; ++ ++ my_pid = bpf_map_lookup_elem(&my_pid_map, &zero); ++ if (!my_pid) ++ return 1; ++ ++ cur_pid = bpf_get_current_pid_tgid() >> 32; ++ if (cur_pid != *my_pid) ++ return 1; ++ ++ my_res = bpf_map_lookup_elem(&res_map, &zero); ++ if (!my_res) ++ return 1; ++ ++ if (*my_res == 0) ++ /* use bpf_printk() in combination with BPF_NO_GLOBAL_DATA to ++ * force .rodata.str1.1 section that previously caused ++ * problems on old kernels due to libbpf always tried to ++ * create a global data map for it ++ */ ++ bpf_printk("Legacy-case bpf_printk test, pid %d\n", cur_pid); ++ *my_res = 1; ++ ++ return *my_res; ++} ++ ++SEC("tp/raw_syscalls/sys_enter") ++int handle_modern(void *ctx) ++{ ++ int zero = 0, cur_pid; ++ ++ cur_pid = bpf_get_current_pid_tgid() >> 32; ++ if (cur_pid != my_pid_var) ++ return 1; ++ ++ if (res_var == 0) ++ /* we need bpf_printk() to validate libbpf logic around unused ++ * global maps and legacy kernels; see comment in handle_legacy() ++ */ ++ bpf_printk("Modern-case bpf_printk test, pid %d\n", cur_pid); ++ res_var = 1; ++ ++ return res_var; ++} diff --git a/patches.suse/selftests-bpf-Move-summary-line-after-the-error-logs.patch b/patches.suse/selftests-bpf-Move-summary-line-after-the-error-logs.patch new file mode 100644 index 0000000..97a2829 --- /dev/null +++ b/patches.suse/selftests-bpf-Move-summary-line-after-the-error-logs.patch @@ -0,0 +1,48 @@ +From: Yucong Sun +Date: Fri, 12 Nov 2021 11:25:32 -0800 +Subject: selftests/bpf: Move summary line after the error logs +Patch-mainline: v5.17-rc1 +Git-commit: ea78548e0f98951fa7641037ad98a750137d6b6a +References: jsc#PED-1368 + +Makes it easier to find the summary line when there is a lot of logs to +scroll back. + +Signed-off-by: Yucong Sun +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211112192535.898352-2-fallentree@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/test_progs.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/tools/testing/selftests/bpf/test_progs.c ++++ b/tools/testing/selftests/bpf/test_progs.c +@@ -1198,11 +1198,11 @@ static int server_main(void) + env.sub_succ_cnt += result->sub_succ_cnt; + } + ++ print_all_error_logs(); ++ + fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", + env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); + +- print_all_error_logs(); +- + /* reap all workers */ + for (i = 0; i < env.workers; i++) { + int wstatus, pid; +@@ -1484,11 +1484,11 @@ int main(int argc, char **argv) + if (env.list_test_names) + goto out; + ++ print_all_error_logs(); ++ + fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", + env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); + +- print_all_error_logs(); +- + close(env.saved_netns_fd); + out: + if (!env.list_test_names && env.has_testmod) diff --git a/patches.suse/selftests-bpf-Mute-xdpxceiver.c-s-deprecation-warnin.patch b/patches.suse/selftests-bpf-Mute-xdpxceiver.c-s-deprecation-warnin.patch new file mode 100644 index 0000000..c4fbb28 --- /dev/null +++ b/patches.suse/selftests-bpf-Mute-xdpxceiver.c-s-deprecation-warnin.patch @@ -0,0 +1,34 @@ +From: Andrii Nakryiko +Date: Wed, 1 Dec 2021 15:28:20 -0800 +Subject: selftests/bpf: Mute xdpxceiver.c's deprecation warnings +Patch-mainline: v5.17-rc1 +Git-commit: 00872de6e1b004377f6036f95db43e2145606eb2 +References: jsc#PED-1368 + +xdpxceiver.c is using AF_XDP APIs that are deprecated starting from +libbpf 0.7. Until we migrate the test to libxdp or solve this issue in +some other way, mute deprecation warnings within xdpxceiver.c. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211201232824.3166325-6-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/xdpxceiver.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/tools/testing/selftests/bpf/xdpxceiver.c ++++ b/tools/testing/selftests/bpf/xdpxceiver.c +@@ -100,6 +100,12 @@ + #include "xdpxceiver.h" + #include "../kselftest.h" + ++/* AF_XDP APIs were moved into libxdp and marked as deprecated in libbpf. ++ * Until xdpxceiver is either moved or re-writed into libxdp, suppress ++ * deprecation warnings in this file ++ */ ++#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ++ + static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62"; + static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61"; + static const char *IP1 = "192.168.100.162"; diff --git a/patches.suse/selftests-bpf-Pass-sanitizer-flags-to-linker-through.patch b/patches.suse/selftests-bpf-Pass-sanitizer-flags-to-linker-through.patch new file mode 100644 index 0000000..1c67555 --- /dev/null +++ b/patches.suse/selftests-bpf-Pass-sanitizer-flags-to-linker-through.patch @@ -0,0 +1,30 @@ +From: Andrii Nakryiko +Date: Sun, 7 Nov 2021 08:55:13 -0800 +Subject: selftests/bpf: Pass sanitizer flags to linker through LDFLAGS +Patch-mainline: v5.17-rc1 +Git-commit: 2a2cb45b727b7a1041f3d3d93414b774e66454bb +References: jsc#PED-1368 + +When adding -fsanitize=address to SAN_CFLAGS, it has to be passed both +to compiler through CFLAGS as well as linker through LDFLAGS. Add +SAN_CFLAGS into LDFLAGS to allow building selftests with ASAN. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Reviewed-by: Hengqi Chen +Link: https://lore.kernel.org/bpf/20211107165521.9240-2-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/Makefile | 1 + + 1 file changed, 1 insertion(+) + +--- a/tools/testing/selftests/bpf/Makefile ++++ b/tools/testing/selftests/bpf/Makefile +@@ -24,6 +24,7 @@ SAN_CFLAGS ?= + CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS) \ + -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ + -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) ++LDFLAGS += $(SAN_CFLAGS) + LDLIBS += -lcap -lelf -lz -lrt -lpthread + + # Silence some warnings when compiled with clang diff --git a/patches.suse/selftests-bpf-Prevent-misaligned-memory-access-in-ge.patch b/patches.suse/selftests-bpf-Prevent-misaligned-memory-access-in-ge.patch new file mode 100644 index 0000000..a425305 --- /dev/null +++ b/patches.suse/selftests-bpf-Prevent-misaligned-memory-access-in-ge.patch @@ -0,0 +1,66 @@ +From: Andrii Nakryiko +Date: Tue, 23 Nov 2021 16:23:21 -0800 +Subject: selftests/bpf: Prevent misaligned memory access in get_stack_raw_tp + test +Patch-mainline: v5.17-rc1 +Git-commit: 6c4dedb7550aafd094f7d803668fd039545f4e57 +References: jsc#PED-1368 + +Perfbuf doesn't guarantee 8-byte alignment of the data like BPF ringbuf +does, so struct get_stack_trace_t can arrive not properly aligned for +subsequent u64 accesses. Easiest fix is to just copy data locally. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211124002325.1737739-10-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c | 14 ++++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c ++++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c +@@ -24,13 +24,19 @@ static void get_stack_print_output(void + { + bool good_kern_stack = false, good_user_stack = false; + const char *nonjit_func = "___bpf_prog_run"; +- struct get_stack_trace_t *e = data; ++ /* perfbuf-submitted data is 4-byte aligned, but we need 8-byte ++ * alignment, so copy data into a local variable, for simplicity ++ */ ++ struct get_stack_trace_t e; + int i, num_stack; + static __u64 cnt; + struct ksym *ks; + + cnt++; + ++ memset(&e, 0, sizeof(e)); ++ memcpy(&e, data, size <= sizeof(e) ? size : sizeof(e)); ++ + if (size < sizeof(struct get_stack_trace_t)) { + __u64 *raw_data = data; + bool found = false; +@@ -57,19 +63,19 @@ static void get_stack_print_output(void + good_user_stack = true; + } + } else { +- num_stack = e->kern_stack_size / sizeof(__u64); ++ num_stack = e.kern_stack_size / sizeof(__u64); + if (env.jit_enabled) { + good_kern_stack = num_stack > 0; + } else { + for (i = 0; i < num_stack; i++) { +- ks = ksym_search(e->kern_stack[i]); ++ ks = ksym_search(e.kern_stack[i]); + if (ks && (strcmp(ks->name, nonjit_func) == 0)) { + good_kern_stack = true; + break; + } + } + } +- if (e->user_stack_size > 0 && e->user_stack_buildid_size > 0) ++ if (e.user_stack_size > 0 && e.user_stack_buildid_size > 0) + good_user_stack = true; + } + diff --git a/patches.suse/selftests-bpf-Prevent-out-of-bounds-stack-access-in-.patch b/patches.suse/selftests-bpf-Prevent-out-of-bounds-stack-access-in-.patch new file mode 100644 index 0000000..dcce235 --- /dev/null +++ b/patches.suse/selftests-bpf-Prevent-out-of-bounds-stack-access-in-.patch @@ -0,0 +1,35 @@ +From: Andrii Nakryiko +Date: Tue, 23 Nov 2021 16:23:23 -0800 +Subject: selftests/bpf: Prevent out-of-bounds stack access in test_bpffs +Patch-mainline: v5.17-rc1 +Git-commit: 57428298b5acf2ba2dd98359c532774f6eaeecb3 +References: jsc#PED-1368 + +Buf can be not zero-terminated leading to strstr() to access data beyond +the intended buf[] array. Fix by forcing zero termination. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211124002325.1737739-12-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/test_bpffs.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c ++++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c +@@ -19,11 +19,13 @@ static int read_iter(char *file) + fd = open(file, 0); + if (fd < 0) + return -1; +- while ((len = read(fd, buf, sizeof(buf))) > 0) ++ while ((len = read(fd, buf, sizeof(buf))) > 0) { ++ buf[sizeof(buf) - 1] = '\0'; + if (strstr(buf, "iter")) { + close(fd); + return 0; + } ++ } + close(fd); + return -1; + } diff --git a/patches.suse/selftests-bpf-Remove-all-the-uses-of-deprecated-bpf_.patch b/patches.suse/selftests-bpf-Remove-all-the-uses-of-deprecated-bpf_.patch new file mode 100644 index 0000000..b7d3624 --- /dev/null +++ b/patches.suse/selftests-bpf-Remove-all-the-uses-of-deprecated-bpf_.patch @@ -0,0 +1,435 @@ +From: Andrii Nakryiko +Date: Wed, 1 Dec 2021 15:28:21 -0800 +Subject: selftests/bpf: Remove all the uses of deprecated + bpf_prog_load_xattr() +Patch-mainline: v5.17-rc1 +Git-commit: 186d1a86003ddcf0ec9e85e17ece868663106639 +References: jsc#PED-1368 + +Migrate all the selftests that were still using bpf_prog_load_xattr(). +Few are converted to skeleton, others will use bpf_object__open_file() +API. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211201232824.3166325-7-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c | 28 ++++- + tools/testing/selftests/bpf/prog_tests/connect_force_port.c | 17 +-- + tools/testing/selftests/bpf/prog_tests/kfree_skb.c | 58 +++--------- + tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c | 12 +- + tools/testing/selftests/bpf/prog_tests/sockopt_multi.c | 12 +- + tools/testing/selftests/bpf/prog_tests/tcp_rtt.c | 21 +--- + tools/testing/selftests/bpf/prog_tests/test_global_funcs.c | 28 ++++- + tools/testing/selftests/bpf/test_sock_addr.c | 33 ++++-- + tools/testing/selftests/bpf/xdp_redirect_multi.c | 15 +-- + 9 files changed, 118 insertions(+), 106 deletions(-) + +--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c ++++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +@@ -19,16 +19,28 @@ extern int extra_prog_load_log_flags; + + static int check_load(const char *file, enum bpf_prog_type type) + { +- struct bpf_prog_load_attr attr; + struct bpf_object *obj = NULL; +- int err, prog_fd; ++ struct bpf_program *prog; ++ int err; + +- memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); +- attr.file = file; +- attr.prog_type = type; +- attr.log_level = 4 | extra_prog_load_log_flags; +- attr.prog_flags = BPF_F_TEST_RND_HI32; +- err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); ++ obj = bpf_object__open_file(file, NULL); ++ err = libbpf_get_error(obj); ++ if (err) ++ return err; ++ ++ prog = bpf_object__next_program(obj, NULL); ++ if (!prog) { ++ err = -ENOENT; ++ goto err_out; ++ } ++ ++ bpf_program__set_type(prog, type); ++ bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32); ++ bpf_program__set_log_level(prog, 4 | extra_prog_load_log_flags); ++ ++ err = bpf_object__load(obj); ++ ++err_out: + bpf_object__close(obj); + return err; + } +--- a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c ++++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c +@@ -51,19 +51,20 @@ static int run_test(int cgroup_fd, int s + bool v4 = family == AF_INET; + __u16 expected_local_port = v4 ? 22222 : 22223; + __u16 expected_peer_port = 60000; +- struct bpf_prog_load_attr attr = { +- .file = v4 ? "./connect_force_port4.o" : +- "./connect_force_port6.o", +- }; + struct bpf_program *prog; + struct bpf_object *obj; +- int xlate_fd, fd, err; ++ const char *obj_file = v4 ? "connect_force_port4.o" : "connect_force_port6.o"; ++ int fd, err; + __u32 duration = 0; + +- err = bpf_prog_load_xattr(&attr, &obj, &xlate_fd); +- if (err) { +- log_err("Failed to load BPF object"); ++ obj = bpf_object__open_file(obj_file, NULL); ++ if (!ASSERT_OK_PTR(obj, "bpf_obj_open")) + return -1; ++ ++ err = bpf_object__load(obj); ++ if (!ASSERT_OK(err, "bpf_obj_load")) { ++ err = -EIO; ++ goto close_bpf_object; + } + + prog = bpf_object__find_program_by_title(obj, v4 ? +--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c ++++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c +@@ -1,6 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0 + #include + #include ++#include "kfree_skb.skel.h" + + struct meta { + int ifindex; +@@ -58,16 +59,11 @@ void serial_test_kfree_skb(void) + .ctx_in = &skb, + .ctx_size_in = sizeof(skb), + }; +- struct bpf_prog_load_attr attr = { +- .file = "./kfree_skb.o", +- }; +- +- struct bpf_link *link = NULL, *link_fentry = NULL, *link_fexit = NULL; +- struct bpf_map *perf_buf_map, *global_data; +- struct bpf_program *prog, *fentry, *fexit; +- struct bpf_object *obj, *obj2 = NULL; ++ struct kfree_skb *skel = NULL; ++ struct bpf_link *link; ++ struct bpf_object *obj; + struct perf_buffer *pb = NULL; +- int err, kfree_skb_fd; ++ int err; + bool passed = false; + __u32 duration = 0; + const int zero = 0; +@@ -78,40 +74,27 @@ void serial_test_kfree_skb(void) + if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) + return; + +- err = bpf_prog_load_xattr(&attr, &obj2, &kfree_skb_fd); +- if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) +- goto close_prog; +- +- prog = bpf_object__find_program_by_title(obj2, "tp_btf/kfree_skb"); +- if (CHECK(!prog, "find_prog", "prog kfree_skb not found\n")) +- goto close_prog; +- fentry = bpf_object__find_program_by_title(obj2, "fentry/eth_type_trans"); +- if (CHECK(!fentry, "find_prog", "prog eth_type_trans not found\n")) +- goto close_prog; +- fexit = bpf_object__find_program_by_title(obj2, "fexit/eth_type_trans"); +- if (CHECK(!fexit, "find_prog", "prog eth_type_trans not found\n")) +- goto close_prog; +- +- global_data = bpf_object__find_map_by_name(obj2, ".bss"); +- if (CHECK(!global_data, "find global data", "not found\n")) ++ skel = kfree_skb__open_and_load(); ++ if (!ASSERT_OK_PTR(skel, "kfree_skb_skel")) + goto close_prog; + +- link = bpf_program__attach_raw_tracepoint(prog, NULL); ++ link = bpf_program__attach_raw_tracepoint(skel->progs.trace_kfree_skb, NULL); + if (!ASSERT_OK_PTR(link, "attach_raw_tp")) + goto close_prog; +- link_fentry = bpf_program__attach_trace(fentry); +- if (!ASSERT_OK_PTR(link_fentry, "attach fentry")) +- goto close_prog; +- link_fexit = bpf_program__attach_trace(fexit); +- if (!ASSERT_OK_PTR(link_fexit, "attach fexit")) ++ skel->links.trace_kfree_skb = link; ++ ++ link = bpf_program__attach_trace(skel->progs.fentry_eth_type_trans); ++ if (!ASSERT_OK_PTR(link, "attach fentry")) + goto close_prog; ++ skel->links.fentry_eth_type_trans = link; + +- perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map"); +- if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n")) ++ link = bpf_program__attach_trace(skel->progs.fexit_eth_type_trans); ++ if (!ASSERT_OK_PTR(link, "attach fexit")) + goto close_prog; ++ skel->links.fexit_eth_type_trans = link; + + /* set up perf buffer */ +- pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, ++ pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, + on_sample, NULL, &passed, NULL); + if (!ASSERT_OK_PTR(pb, "perf_buf__new")) + goto close_prog; +@@ -133,7 +116,7 @@ void serial_test_kfree_skb(void) + */ + ASSERT_TRUE(passed, "passed"); + +- err = bpf_map_lookup_elem(bpf_map__fd(global_data), &zero, test_ok); ++ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.bss), &zero, test_ok); + if (CHECK(err, "get_result", + "failed to get output data: %d\n", err)) + goto close_prog; +@@ -141,9 +124,6 @@ void serial_test_kfree_skb(void) + CHECK_FAIL(!test_ok[0] || !test_ok[1]); + close_prog: + perf_buffer__free(pb); +- bpf_link__destroy(link); +- bpf_link__destroy(link_fentry); +- bpf_link__destroy(link_fexit); + bpf_object__close(obj); +- bpf_object__close(obj2); ++ kfree_skb__destroy(skel); + } +--- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c ++++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c +@@ -167,20 +167,20 @@ static int prog_attach(struct bpf_object + + static void run_test(int cgroup_fd) + { +- struct bpf_prog_load_attr attr = { +- .file = "./sockopt_inherit.o", +- }; + int server_fd = -1, client_fd; + struct bpf_object *obj; + void *server_err; + pthread_t tid; +- int ignored; + int err; + +- err = bpf_prog_load_xattr(&attr, &obj, &ignored); +- if (CHECK_FAIL(err)) ++ obj = bpf_object__open_file("sockopt_inherit.o", NULL); ++ if (!ASSERT_OK_PTR(obj, "obj_open")) + return; + ++ err = bpf_object__load(obj); ++ if (!ASSERT_OK(err, "obj_load")) ++ goto close_bpf_object; ++ + err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt"); + if (CHECK_FAIL(err)) + goto close_bpf_object; +--- a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c ++++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c +@@ -297,14 +297,10 @@ detach: + + void test_sockopt_multi(void) + { +- struct bpf_prog_load_attr attr = { +- .file = "./sockopt_multi.o", +- }; + int cg_parent = -1, cg_child = -1; + struct bpf_object *obj = NULL; + int sock_fd = -1; + int err = -1; +- int ignored; + + cg_parent = test__join_cgroup("/parent"); + if (CHECK_FAIL(cg_parent < 0)) +@@ -314,8 +310,12 @@ void test_sockopt_multi(void) + if (CHECK_FAIL(cg_child < 0)) + goto out; + +- err = bpf_prog_load_xattr(&attr, &obj, &ignored); +- if (CHECK_FAIL(err)) ++ obj = bpf_object__open_file("sockopt_multi.o", NULL); ++ if (!ASSERT_OK_PTR(obj, "obj_load")) ++ goto out; ++ ++ err = bpf_object__load(obj); ++ if (!ASSERT_OK(err, "obj_load")) + goto out; + + sock_fd = socket(AF_INET, SOCK_STREAM, 0); +--- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c ++++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c +@@ -2,6 +2,7 @@ + #include + #include "cgroup_helpers.h" + #include "network_helpers.h" ++#include "tcp_rtt.skel.h" + + struct tcp_rtt_storage { + __u32 invoked; +@@ -91,26 +92,18 @@ static int verify_sk(int map_fd, int cli + + static int run_test(int cgroup_fd, int server_fd) + { +- struct bpf_prog_load_attr attr = { +- .prog_type = BPF_PROG_TYPE_SOCK_OPS, +- .file = "./tcp_rtt.o", +- .expected_attach_type = BPF_CGROUP_SOCK_OPS, +- }; +- struct bpf_object *obj; +- struct bpf_map *map; ++ struct tcp_rtt *skel; + int client_fd; + int prog_fd; + int map_fd; + int err; + +- err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); +- if (err) { +- log_err("Failed to load BPF object"); ++ skel = tcp_rtt__open_and_load(); ++ if (!ASSERT_OK_PTR(skel, "skel_open_load")) + return -1; +- } + +- map = bpf_object__next_map(obj, NULL); +- map_fd = bpf_map__fd(map); ++ map_fd = bpf_map__fd(skel->maps.socket_storage_map); ++ prog_fd = bpf_program__fd(skel->progs._sockops); + + err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0); + if (err) { +@@ -149,7 +142,7 @@ close_client_fd: + close(client_fd); + + close_bpf_object: +- bpf_object__close(obj); ++ tcp_rtt__destroy(skel); + return err; + } + +--- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c ++++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c +@@ -30,17 +30,29 @@ extern int extra_prog_load_log_flags; + + static int check_load(const char *file) + { +- struct bpf_prog_load_attr attr; + struct bpf_object *obj = NULL; +- int err, prog_fd; ++ struct bpf_program *prog; ++ int err; + +- memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); +- attr.file = file; +- attr.prog_type = BPF_PROG_TYPE_UNSPEC; +- attr.log_level = extra_prog_load_log_flags; +- attr.prog_flags = BPF_F_TEST_RND_HI32; + found = false; +- err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); ++ ++ obj = bpf_object__open_file(file, NULL); ++ err = libbpf_get_error(obj); ++ if (err) ++ return err; ++ ++ prog = bpf_object__next_program(obj, NULL); ++ if (!prog) { ++ err = -ENOENT; ++ goto err_out; ++ } ++ ++ bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32); ++ bpf_program__set_log_level(prog, extra_prog_load_log_flags); ++ ++ err = bpf_object__load(obj); ++ ++err_out: + bpf_object__close(obj); + return err; + } +--- a/tools/testing/selftests/bpf/test_sock_addr.c ++++ b/tools/testing/selftests/bpf/test_sock_addr.c +@@ -663,23 +663,36 @@ static int load_insns(const struct sock_ + + static int load_path(const struct sock_addr_test *test, const char *path) + { +- struct bpf_prog_load_attr attr; + struct bpf_object *obj; +- int prog_fd; ++ struct bpf_program *prog; ++ int err; + +- memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); +- attr.file = path; +- attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; +- attr.expected_attach_type = test->expected_attach_type; +- attr.prog_flags = BPF_F_TEST_RND_HI32; ++ obj = bpf_object__open_file(path, NULL); ++ err = libbpf_get_error(obj); ++ if (err) { ++ log_err(">>> Opening BPF object (%s) error.\n", path); ++ return -1; ++ } ++ ++ prog = bpf_object__next_program(obj, NULL); ++ if (!prog) ++ goto err_out; + +- if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) { ++ bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR); ++ bpf_program__set_expected_attach_type(prog, test->expected_attach_type); ++ bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32); ++ ++ err = bpf_object__load(obj); ++ if (err) { + if (test->expected_result != LOAD_REJECT) + log_err(">>> Loading program (%s) error.\n", path); +- return -1; ++ goto err_out; + } + +- return prog_fd; ++ return bpf_program__fd(prog); ++err_out: ++ bpf_object__close(obj); ++ return -1; + } + + static int bind4_prog_load(const struct sock_addr_test *test) +--- a/tools/testing/selftests/bpf/xdp_redirect_multi.c ++++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c +@@ -85,10 +85,7 @@ int main(int argc, char **argv) + { + int prog_fd, group_all, mac_map; + struct bpf_program *ingress_prog, *egress_prog; +- struct bpf_prog_load_attr prog_load_attr = { +- .prog_type = BPF_PROG_TYPE_UNSPEC, +- }; +- int i, ret, opt, egress_prog_fd = 0; ++ int i, err, ret, opt, egress_prog_fd = 0; + struct bpf_devmap_val devmap_val; + bool attach_egress_prog = false; + unsigned char mac_addr[6]; +@@ -147,10 +144,14 @@ int main(int argc, char **argv) + printf("\n"); + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); +- prog_load_attr.file = filename; +- +- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) ++ obj = bpf_object__open_file(filename, NULL); ++ err = libbpf_get_error(obj); ++ if (err) ++ goto err_out; ++ err = bpf_object__load(obj); ++ if (err) + goto err_out; ++ prog_fd = bpf_program__fd(bpf_object__next_program(obj, NULL)); + + if (attach_egress_prog) + group_all = bpf_object__find_map_fd_by_name(obj, "map_egress"); diff --git a/patches.suse/selftests-bpf-Remove-explicit-setrlimit-RLIMIT_MEMLO.patch b/patches.suse/selftests-bpf-Remove-explicit-setrlimit-RLIMIT_MEMLO.patch new file mode 100644 index 0000000..97efb72 --- /dev/null +++ b/patches.suse/selftests-bpf-Remove-explicit-setrlimit-RLIMIT_MEMLO.patch @@ -0,0 +1,145 @@ +From: Andrii Nakryiko +Date: Tue, 14 Dec 2021 11:59:04 -0800 +Subject: selftests/bpf: Remove explicit setrlimit(RLIMIT_MEMLOCK) in main + selftests +Patch-mainline: v5.17-rc1 +Git-commit: c164b8b40422ef5c643d08bbc63280e1e1610573 +References: jsc#PED-1368 + +As libbpf now is able to automatically take care of RLIMIT_MEMLOCK +increase (or skip it altogether on recent enough kernels), remove +explicit setrlimit() invocations in bench, test_maps, test_verifier, and +test_progs. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211214195904.1785155-3-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/bench.c | 16 -------------- + tools/testing/selftests/bpf/prog_tests/btf.c | 1 + tools/testing/selftests/bpf/prog_tests/select_reuseport.c | 1 + tools/testing/selftests/bpf/prog_tests/sk_lookup.c | 1 + tools/testing/selftests/bpf/prog_tests/sock_fields.c | 1 + tools/testing/selftests/bpf/test_maps.c | 1 + tools/testing/selftests/bpf/test_progs.c | 2 - + tools/testing/selftests/bpf/test_verifier.c | 4 ++- + 8 files changed, 3 insertions(+), 24 deletions(-) + +--- a/tools/testing/selftests/bpf/bench.c ++++ b/tools/testing/selftests/bpf/bench.c +@@ -29,26 +29,10 @@ static int libbpf_print_fn(enum libbpf_p + return vfprintf(stderr, format, args); + } + +-static int bump_memlock_rlimit(void) +-{ +- struct rlimit rlim_new = { +- .rlim_cur = RLIM_INFINITY, +- .rlim_max = RLIM_INFINITY, +- }; +- +- return setrlimit(RLIMIT_MEMLOCK, &rlim_new); +-} +- + void setup_libbpf(void) + { +- int err; +- + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + libbpf_set_print(libbpf_print_fn); +- +- err = bump_memlock_rlimit(); +- if (err) +- fprintf(stderr, "failed to increase RLIMIT_MEMLOCK: %d", err); + } + + void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns) +--- a/tools/testing/selftests/bpf/prog_tests/btf.c ++++ b/tools/testing/selftests/bpf/prog_tests/btf.c +@@ -22,7 +22,6 @@ + #include + #include + +-#include "bpf_rlimit.h" + #include "bpf_util.h" + #include "../test_btf.h" + #include "test_progs.h" +--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c ++++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +@@ -18,7 +18,6 @@ + #include + #include + #include +-#include "bpf_rlimit.h" + #include "bpf_util.h" + + #include "test_progs.h" +--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c ++++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +@@ -30,7 +30,6 @@ + #include + + #include "test_progs.h" +-#include "bpf_rlimit.h" + #include "bpf_util.h" + #include "cgroup_helpers.h" + #include "network_helpers.h" +--- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c ++++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c +@@ -15,7 +15,6 @@ + #include "network_helpers.h" + #include "cgroup_helpers.h" + #include "test_progs.h" +-#include "bpf_rlimit.h" + #include "test_sock_fields.skel.h" + + enum bpf_linum_array_idx { +--- a/tools/testing/selftests/bpf/test_maps.c ++++ b/tools/testing/selftests/bpf/test_maps.c +@@ -23,7 +23,6 @@ + #include + + #include "bpf_util.h" +-#include "bpf_rlimit.h" + #include "test_maps.h" + #include "testing_helpers.h" + +--- a/tools/testing/selftests/bpf/test_progs.c ++++ b/tools/testing/selftests/bpf/test_progs.c +@@ -4,7 +4,6 @@ + #define _GNU_SOURCE + #include "test_progs.h" + #include "cgroup_helpers.h" +-#include "bpf_rlimit.h" + #include + #include + #include +@@ -1342,7 +1341,6 @@ int main(int argc, char **argv) + + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); +- + libbpf_set_print(libbpf_print_fn); + + srand(time(NULL)); +--- a/tools/testing/selftests/bpf/test_verifier.c ++++ b/tools/testing/selftests/bpf/test_verifier.c +@@ -41,7 +41,6 @@ + # define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1 + # endif + #endif +-#include "bpf_rlimit.h" + #include "bpf_rand.h" + #include "bpf_util.h" + #include "test_btf.h" +@@ -1385,6 +1384,9 @@ int main(int argc, char **argv) + return EXIT_FAILURE; + } + ++ /* Use libbpf 1.0 API mode */ ++ libbpf_set_strict_mode(LIBBPF_STRICT_ALL); ++ + bpf_semi_rand_init(); + return do_test(unpriv, from, to); + } diff --git a/patches.suse/selftests-bpf-Remove-last-bpf_create_map_xattr-from-.patch b/patches.suse/selftests-bpf-Remove-last-bpf_create_map_xattr-from-.patch new file mode 100644 index 0000000..d3f10fa --- /dev/null +++ b/patches.suse/selftests-bpf-Remove-last-bpf_create_map_xattr-from-.patch @@ -0,0 +1,48 @@ +From: Andrii Nakryiko +Date: Sun, 12 Dec 2021 11:13:41 -0800 +Subject: selftests/bpf: Remove last bpf_create_map_xattr from test_verifier +Patch-mainline: v5.17-rc1 +Git-commit: f12468828c28ff90d20c99b234a94223401f7924 +References: jsc#PED-1368 + +bpf_create_map_xattr() call was reintroduced after merging bpf tree into +bpf-next tree. Convert the last instance into bpf_map_create() call. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211212191341.2529573-1-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/test_verifier.c | 14 +++++--------- + 1 file changed, 5 insertions(+), 9 deletions(-) + +--- a/tools/testing/selftests/bpf/test_verifier.c ++++ b/tools/testing/selftests/bpf/test_verifier.c +@@ -697,22 +697,18 @@ static int create_sk_storage_map(void) + + static int create_map_timer(void) + { +- struct bpf_create_map_attr attr = { +- .name = "test_map", +- .map_type = BPF_MAP_TYPE_ARRAY, +- .key_size = 4, +- .value_size = 16, +- .max_entries = 1, ++ LIBBPF_OPTS(bpf_map_create_opts, opts, + .btf_key_type_id = 1, + .btf_value_type_id = 5, +- }; ++ ); + int fd, btf_fd; + + btf_fd = load_btf(); + if (btf_fd < 0) + return -1; +- attr.btf_fd = btf_fd; +- fd = bpf_create_map_xattr(&attr); ++ ++ opts.btf_fd = btf_fd; ++ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_map", 4, 16, 1, &opts); + if (fd < 0) + printf("Failed to create map with timer\n"); + return fd; diff --git a/patches.suse/selftests-bpf-Remove-recently-reintroduced-legacy-bt.patch b/patches.suse/selftests-bpf-Remove-recently-reintroduced-legacy-bt.patch new file mode 100644 index 0000000..370d4f1 --- /dev/null +++ b/patches.suse/selftests-bpf-Remove-recently-reintroduced-legacy-bt.patch @@ -0,0 +1,38 @@ +From: Andrii Nakryiko +Date: Wed, 1 Dec 2021 15:28:19 -0800 +Subject: selftests/bpf: Remove recently reintroduced legacy btf__dedup() use +Patch-mainline: v5.17-rc1 +Git-commit: 045b233a29a2ea3a168296f000cd5b1c08c4a2f7 +References: jsc#PED-1368 + +We've added one extra patch that added back the use of legacy +btf__dedup() variant. Clean that up. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211201232824.3166325-5-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c ++++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c +@@ -364,7 +364,7 @@ static void test_split_dup_struct_in_cu( + "\t'f2' type_id=1 bits_offset=32"); + + /* ..dedup them... */ +- err = btf__dedup(btf1, NULL, NULL); ++ err = btf__dedup(btf1, NULL); + if (!ASSERT_OK(err, "btf_dedup")) + goto cleanup; + +@@ -405,7 +405,7 @@ static void test_split_dup_struct_in_cu( + "\t'f1' type_id=4 bits_offset=0\n" + "\t'f2' type_id=4 bits_offset=32"); + +- err = btf__dedup(btf2, NULL, NULL); ++ err = btf__dedup(btf2, NULL); + if (!ASSERT_OK(err, "btf_dedup")) + goto cleanup; + diff --git a/patches.suse/selftests-bpf-Remove-the-only-use-of-deprecated-bpf_.patch b/patches.suse/selftests-bpf-Remove-the-only-use-of-deprecated-bpf_.patch new file mode 100644 index 0000000..0df0535 --- /dev/null +++ b/patches.suse/selftests-bpf-Remove-the-only-use-of-deprecated-bpf_.patch @@ -0,0 +1,50 @@ +From: Andrii Nakryiko +Date: Thu, 9 Dec 2021 11:38:39 -0800 +Subject: selftests/bpf: Remove the only use of deprecated + bpf_object__load_xattr() +Patch-mainline: v5.17-rc1 +Git-commit: 3fc5fdcca144badbaf29b62aacbf7877f2f39a74 +References: jsc#PED-1368 + +Switch from bpf_object__load_xattr() to bpf_object__load() and +kernel_log_level in bpf_object_open_opts. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211209193840.1248570-12-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/testing_helpers.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/tools/testing/selftests/bpf/testing_helpers.c ++++ b/tools/testing/selftests/bpf/testing_helpers.c +@@ -88,13 +88,15 @@ int extra_prog_load_log_flags = 0; + int bpf_prog_test_load(const char *file, enum bpf_prog_type type, + struct bpf_object **pobj, int *prog_fd) + { +- struct bpf_object_load_attr attr = {}; ++ LIBBPF_OPTS(bpf_object_open_opts, opts, ++ .kernel_log_level = extra_prog_load_log_flags, ++ ); + struct bpf_object *obj; + struct bpf_program *prog; + __u32 flags; + int err; + +- obj = bpf_object__open(file); ++ obj = bpf_object__open_file(file, &opts); + if (!obj) + return -errno; + +@@ -110,9 +112,7 @@ int bpf_prog_test_load(const char *file, + flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32; + bpf_program__set_flags(prog, flags); + +- attr.obj = obj; +- attr.log_level = extra_prog_load_log_flags; +- err = bpf_object__load_xattr(&attr); ++ err = bpf_object__load(obj); + if (err) + goto err_out; + diff --git a/patches.suse/selftests-bpf-Rename-progs-tag.c-to-progs-btf_decl_t.patch b/patches.suse/selftests-bpf-Rename-progs-tag.c-to-progs-btf_decl_t.patch new file mode 100644 index 0000000..9b6b038 --- /dev/null +++ b/patches.suse/selftests-bpf-Rename-progs-tag.c-to-progs-btf_decl_t.patch @@ -0,0 +1,175 @@ +From: Yonghong Song +Date: Thu, 11 Nov 2021 17:26:41 -0800 +Subject: selftests/bpf: Rename progs/tag.c to progs/btf_decl_tag.c +Patch-mainline: v5.17-rc1 +Git-commit: 26c79fcbfa64b18ca1407a3be7ac3442aef51073 +References: jsc#PED-1368 + +Rename progs/tag.c to progs/btf_decl_tag.c so we can introduce +progs/btf_type_tag.c in the next patch. + +Also create a subtest for btf_decl_tag in prog_tests/btf_tag.c +so we can introduce btf_type_tag subtest in the next patch. + +I also took opportunity to remove the check whether __has_attribute +is defined or not in progs/btf_decl_tag.c since all recent +clangs should already support this macro. + +Signed-off-by: Yonghong Song +Signed-off-by: Alexei Starovoitov +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211112012641.1507144-1-yhs@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/btf_tag.c | 20 +++++--- + tools/testing/selftests/bpf/progs/btf_decl_tag.c | 50 +++++++++++++++++++++ + tools/testing/selftests/bpf/progs/tag.c | 54 ----------------------- + 3 files changed, 63 insertions(+), 61 deletions(-) + rename tools/testing/selftests/bpf/progs/{tag.c => btf_decl_tag.c} (94%) + +--- a/tools/testing/selftests/bpf/prog_tests/btf_tag.c ++++ b/tools/testing/selftests/bpf/prog_tests/btf_tag.c +@@ -1,20 +1,26 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright (c) 2021 Facebook */ + #include +-#include "tag.skel.h" ++#include "btf_decl_tag.skel.h" + +-void test_btf_tag(void) ++static void test_btf_decl_tag(void) + { +- struct tag *skel; ++ struct btf_decl_tag *skel; + +- skel = tag__open_and_load(); +- if (!ASSERT_OK_PTR(skel, "btf_tag")) ++ skel = btf_decl_tag__open_and_load(); ++ if (!ASSERT_OK_PTR(skel, "btf_decl_tag")) + return; + + if (skel->rodata->skip_tests) { +- printf("%s:SKIP: btf_tag attribute not supported", __func__); ++ printf("%s:SKIP: btf_decl_tag attribute not supported", __func__); + test__skip(); + } + +- tag__destroy(skel); ++ btf_decl_tag__destroy(skel); ++} ++ ++void test_btf_tag(void) ++{ ++ if (test__start_subtest("btf_decl_tag")) ++ test_btf_decl_tag(); + } +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/btf_decl_tag.c +@@ -0,0 +1,50 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) 2021 Facebook */ ++#include "vmlinux.h" ++#include ++#include ++ ++#if __has_attribute(btf_decl_tag) ++#define __tag1 __attribute__((btf_decl_tag("tag1"))) ++#define __tag2 __attribute__((btf_decl_tag("tag2"))) ++volatile const bool skip_tests __tag1 __tag2 = false; ++#else ++#define __tag1 ++#define __tag2 ++volatile const bool skip_tests = true; ++#endif ++ ++struct key_t { ++ int a; ++ int b __tag1 __tag2; ++ int c; ++} __tag1 __tag2; ++ ++typedef struct { ++ int a; ++ int b; ++} value_t __tag1 __tag2; ++ ++struct { ++ __uint(type, BPF_MAP_TYPE_HASH); ++ __uint(max_entries, 3); ++ __type(key, struct key_t); ++ __type(value, value_t); ++} hashmap1 SEC(".maps"); ++ ++ ++static __noinline int foo(int x __tag1 __tag2) __tag1 __tag2 ++{ ++ struct key_t key; ++ value_t val = {}; ++ ++ key.a = key.b = key.c = x; ++ bpf_map_update_elem(&hashmap1, &key, &val, 0); ++ return 0; ++} ++ ++SEC("fentry/bpf_fentry_test1") ++int BPF_PROG(sub, int x) ++{ ++ return foo(x); ++} +--- a/tools/testing/selftests/bpf/progs/tag.c ++++ /dev/null +@@ -1,54 +0,0 @@ +-// SPDX-License-Identifier: GPL-2.0 +-/* Copyright (c) 2021 Facebook */ +-#include "vmlinux.h" +-#include +-#include +- +-#ifndef __has_attribute +-#define __has_attribute(x) 0 +-#endif +- +-#if __has_attribute(btf_decl_tag) +-#define __tag1 __attribute__((btf_decl_tag("tag1"))) +-#define __tag2 __attribute__((btf_decl_tag("tag2"))) +-volatile const bool skip_tests __tag1 __tag2 = false; +-#else +-#define __tag1 +-#define __tag2 +-volatile const bool skip_tests = true; +-#endif +- +-struct key_t { +- int a; +- int b __tag1 __tag2; +- int c; +-} __tag1 __tag2; +- +-typedef struct { +- int a; +- int b; +-} value_t __tag1 __tag2; +- +-struct { +- __uint(type, BPF_MAP_TYPE_HASH); +- __uint(max_entries, 3); +- __type(key, struct key_t); +- __type(value, value_t); +-} hashmap1 SEC(".maps"); +- +- +-static __noinline int foo(int x __tag1 __tag2) __tag1 __tag2 +-{ +- struct key_t key; +- value_t val = {}; +- +- key.a = key.b = key.c = x; +- bpf_map_update_elem(&hashmap1, &key, &val, 0); +- return 0; +-} +- +-SEC("fentry/bpf_fentry_test1") +-int BPF_PROG(sub, int x) +-{ +- return foo(x); +-} diff --git a/patches.suse/selftests-bpf-Replace-all-uses-of-bpf_load_btf-with-.patch b/patches.suse/selftests-bpf-Replace-all-uses-of-bpf_load_btf-with-.patch new file mode 100644 index 0000000..e1d2971 --- /dev/null +++ b/patches.suse/selftests-bpf-Replace-all-uses-of-bpf_load_btf-with-.patch @@ -0,0 +1,151 @@ +From: Andrii Nakryiko +Date: Thu, 9 Dec 2021 11:38:37 -0800 +Subject: selftests/bpf: Replace all uses of bpf_load_btf() with bpf_btf_load() +Patch-mainline: v5.17-rc1 +Git-commit: dc94121b5ca17adaaabb7959c10d9c6ea504f7b1 +References: jsc#PED-1368 + +Switch all selftests uses of to-be-deprecated bpf_load_btf() with +equivalent bpf_btf_load() calls. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211209193840.1248570-10-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/map_tests/sk_storage_map.c | 2 + tools/testing/selftests/bpf/prog_tests/btf.c | 50 ++++++++++------- + tools/testing/selftests/bpf/test_verifier.c | 2 + 3 files changed, 32 insertions(+), 22 deletions(-) + +--- a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c ++++ b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c +@@ -136,7 +136,7 @@ static int load_btf(void) + memcpy(raw_btf + sizeof(btf_hdr) + sizeof(btf_raw_types), + btf_str_sec, sizeof(btf_str_sec)); + +- return bpf_load_btf(raw_btf, sizeof(raw_btf), 0, 0, 0); ++ return bpf_btf_load(raw_btf, sizeof(raw_btf), NULL); + } + + static int create_sk_storage_map(void) +--- a/tools/testing/selftests/bpf/prog_tests/btf.c ++++ b/tools/testing/selftests/bpf/prog_tests/btf.c +@@ -4071,6 +4071,28 @@ done: + return raw_btf; + } + ++static int load_raw_btf(const void *raw_data, size_t raw_size) ++{ ++ LIBBPF_OPTS(bpf_btf_load_opts, opts); ++ int btf_fd; ++ ++ if (always_log) { ++ opts.log_buf = btf_log_buf, ++ opts.log_size = BTF_LOG_BUF_SIZE, ++ opts.log_level = 1; ++ } ++ ++ btf_fd = bpf_btf_load(raw_data, raw_size, &opts); ++ if (btf_fd < 0 && !always_log) { ++ opts.log_buf = btf_log_buf, ++ opts.log_size = BTF_LOG_BUF_SIZE, ++ opts.log_level = 1; ++ btf_fd = bpf_btf_load(raw_data, raw_size, &opts); ++ } ++ ++ return btf_fd; ++} ++ + static void do_test_raw(unsigned int test_num) + { + struct btf_raw_test *test = &raw_tests[test_num - 1]; +@@ -4100,16 +4122,14 @@ static void do_test_raw(unsigned int tes + hdr->str_len = (int)hdr->str_len + test->str_len_delta; + + *btf_log_buf = '\0'; +- btf_fd = bpf_load_btf(raw_btf, raw_btf_size, +- btf_log_buf, BTF_LOG_BUF_SIZE, +- always_log); ++ btf_fd = load_raw_btf(raw_btf, raw_btf_size); + free(raw_btf); + + err = ((btf_fd < 0) != test->btf_load_err); + if (CHECK(err, "btf_fd:%d test->btf_load_err:%u", + btf_fd, test->btf_load_err) || + CHECK(test->err_str && !strstr(btf_log_buf, test->err_str), +- "expected err_str:%s", test->err_str)) { ++ "expected err_str:%s\n", test->err_str)) { + err = -1; + goto done; + } +@@ -4227,9 +4247,7 @@ static int test_big_btf_info(unsigned in + goto done; + } + +- btf_fd = bpf_load_btf(raw_btf, raw_btf_size, +- btf_log_buf, BTF_LOG_BUF_SIZE, +- always_log); ++ btf_fd = load_raw_btf(raw_btf, raw_btf_size); + if (CHECK(btf_fd < 0, "errno:%d", errno)) { + err = -1; + goto done; +@@ -4315,9 +4333,7 @@ static int test_btf_id(unsigned int test + info[i].btf_size = raw_btf_size; + } + +- btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size, +- btf_log_buf, BTF_LOG_BUF_SIZE, +- always_log); ++ btf_fd[0] = load_raw_btf(raw_btf, raw_btf_size); + if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) { + err = -1; + goto done; +@@ -4447,9 +4463,7 @@ static void do_test_get_info(unsigned in + goto done; + } + +- btf_fd = bpf_load_btf(raw_btf, raw_btf_size, +- btf_log_buf, BTF_LOG_BUF_SIZE, +- always_log); ++ btf_fd = load_raw_btf(raw_btf, raw_btf_size); + if (CHECK(btf_fd <= 0, "errno:%d", errno)) { + err = -1; + goto done; +@@ -5169,12 +5183,10 @@ static void do_test_pprint(int test_num) + return; + + *btf_log_buf = '\0'; +- btf_fd = bpf_load_btf(raw_btf, raw_btf_size, +- btf_log_buf, BTF_LOG_BUF_SIZE, +- always_log); ++ btf_fd = load_raw_btf(raw_btf, raw_btf_size); + free(raw_btf); + +- if (CHECK(btf_fd < 0, "errno:%d", errno)) { ++ if (CHECK(btf_fd < 0, "errno:%d\n", errno)) { + err = -1; + goto done; + } +@@ -6538,9 +6550,7 @@ static void do_test_info_raw(unsigned in + return; + + *btf_log_buf = '\0'; +- btf_fd = bpf_load_btf(raw_btf, raw_btf_size, +- btf_log_buf, BTF_LOG_BUF_SIZE, +- always_log); ++ btf_fd = load_raw_btf(raw_btf, raw_btf_size); + free(raw_btf); + + if (CHECK(btf_fd < 0, "invalid btf_fd errno:%d", errno)) { +--- a/tools/testing/selftests/bpf/test_verifier.c ++++ b/tools/testing/selftests/bpf/test_verifier.c +@@ -650,7 +650,7 @@ static int load_btf(void) + memcpy(ptr, btf_str_sec, hdr.str_len); + ptr += hdr.str_len; + +- btf_fd = bpf_load_btf(raw_btf, ptr - raw_btf, 0, 0, 0); ++ btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, NULL); + free(raw_btf); + if (btf_fd < 0) + return -1; diff --git a/patches.suse/selftests-bpf-Revert-CO-RE-removal-in-test_ksyms_wea.patch b/patches.suse/selftests-bpf-Revert-CO-RE-removal-in-test_ksyms_wea.patch new file mode 100644 index 0000000..0d355ea --- /dev/null +++ b/patches.suse/selftests-bpf-Revert-CO-RE-removal-in-test_ksyms_wea.patch @@ -0,0 +1,31 @@ +From: Alexei Starovoitov +Date: Wed, 1 Dec 2021 10:10:39 -0800 +Subject: selftests/bpf: Revert CO-RE removal in test_ksyms_weak. +Patch-mainline: v5.17-rc1 +Git-commit: 3268f0316af629474ec4fa8d9b4e6f618cb96794 +References: jsc#PED-1368 + +The commit 087cba799ced ("selftests/bpf: Add weak/typeless ksym test for light skeleton") +added test_ksyms_weak to light skeleton testing, but remove CO-RE access. +Revert that part of commit, since light skeleton can use CO-RE in the kernel. + +Signed-off-by: Alexei Starovoitov +Signed-off-by: Andrii Nakryiko +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211201181040.23337-17-alexei.starovoitov@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/progs/test_ksyms_weak.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c ++++ b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c +@@ -38,7 +38,7 @@ int pass_handler(const void *ctx) + /* tests existing symbols. */ + rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0); + if (rq) +- out__existing_typed = 0; ++ out__existing_typed = rq->cpu; + out__existing_typeless = (__u64)&bpf_prog_active; + + /* tests non-existent symbols. */ diff --git a/patches.suse/selftests-bpf-Stop-using-bpf_object__find_program_by.patch b/patches.suse/selftests-bpf-Stop-using-bpf_object__find_program_by.patch new file mode 100644 index 0000000..be3a4fe --- /dev/null +++ b/patches.suse/selftests-bpf-Stop-using-bpf_object__find_program_by.patch @@ -0,0 +1,543 @@ +From: Kui-Feng Lee +Date: Mon, 13 Dec 2021 19:59:28 -0800 +Subject: selftests/bpf: Stop using bpf_object__find_program_by_title API. +Patch-mainline: v5.17-rc1 +Git-commit: a393ea80a22a9beffdc9a527bd2f9f270e7a0c6e +References: jsc#PED-1368 + +bpf_object__find_program_by_title is going to be deprecated. Replace +all use cases in tools/testing/selftests/bpf with +bpf_object__find_program_by_name or bpf_object__for_each_program. + +Signed-off-by: Kui-Feng Lee +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211214035931.1148209-2-kuifeng@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c | 4 + tools/testing/selftests/bpf/prog_tests/connect_force_port.c | 18 +- + tools/testing/selftests/bpf/prog_tests/core_reloc.c | 79 ++++++---- + tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c | 17 +- + tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c | 4 + tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c | 15 + + tools/testing/selftests/bpf/prog_tests/stacktrace_map.c | 4 + tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c | 4 + tools/testing/selftests/bpf/prog_tests/test_overhead.c | 20 +- + tools/testing/selftests/bpf/prog_tests/trampoline_count.c | 6 + 10 files changed, 104 insertions(+), 67 deletions(-) + +--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c ++++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c +@@ -65,8 +65,8 @@ void serial_test_bpf_obj_id(void) + if (CHECK_FAIL(err)) + goto done; + +- prog = bpf_object__find_program_by_title(objs[i], +- "raw_tp/sys_enter"); ++ prog = bpf_object__find_program_by_name(objs[i], ++ "test_obj_id"); + if (CHECK_FAIL(!prog)) + goto done; + links[i] = bpf_program__attach(prog); +--- a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c ++++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c +@@ -67,9 +67,9 @@ static int run_test(int cgroup_fd, int s + goto close_bpf_object; + } + +- prog = bpf_object__find_program_by_title(obj, v4 ? +- "cgroup/connect4" : +- "cgroup/connect6"); ++ prog = bpf_object__find_program_by_name(obj, v4 ? ++ "connect4" : ++ "connect6"); + if (CHECK(!prog, "find_prog", "connect prog not found\n")) { + err = -EIO; + goto close_bpf_object; +@@ -83,9 +83,9 @@ static int run_test(int cgroup_fd, int s + goto close_bpf_object; + } + +- prog = bpf_object__find_program_by_title(obj, v4 ? +- "cgroup/getpeername4" : +- "cgroup/getpeername6"); ++ prog = bpf_object__find_program_by_name(obj, v4 ? ++ "getpeername4" : ++ "getpeername6"); + if (CHECK(!prog, "find_prog", "getpeername prog not found\n")) { + err = -EIO; + goto close_bpf_object; +@@ -99,9 +99,9 @@ static int run_test(int cgroup_fd, int s + goto close_bpf_object; + } + +- prog = bpf_object__find_program_by_title(obj, v4 ? +- "cgroup/getsockname4" : +- "cgroup/getsockname6"); ++ prog = bpf_object__find_program_by_name(obj, v4 ? ++ "getsockname4" : ++ "getsockname6"); + if (CHECK(!prog, "find_prog", "getsockname prog not found\n")) { + err = -EIO; + goto close_bpf_object; +--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c ++++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c +@@ -10,7 +10,7 @@ static int duration = 0; + + #define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name) + +-#define MODULES_CASE(name, sec_name, tp_name) { \ ++#define MODULES_CASE(name, pg_name, tp_name) { \ + .case_name = name, \ + .bpf_obj_file = "test_core_reloc_module.o", \ + .btf_src_file = NULL, /* find in kernel module BTFs */ \ +@@ -28,7 +28,7 @@ static int duration = 0; + .comm_len = sizeof("test_progs"), \ + }, \ + .output_len = sizeof(struct core_reloc_module_output), \ +- .prog_sec_name = sec_name, \ ++ .prog_name = pg_name, \ + .raw_tp_name = tp_name, \ + .trigger = __trigger_module_test_read, \ + .needs_testmod = true, \ +@@ -43,7 +43,9 @@ static int duration = 0; + #define FLAVORS_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_flavors.o", \ +- .btf_src_file = "btf__core_reloc_" #name ".o" \ ++ .btf_src_file = "btf__core_reloc_" #name ".o", \ ++ .raw_tp_name = "sys_enter", \ ++ .prog_name = "test_core_flavors" \ + + #define FLAVORS_CASE(name) { \ + FLAVORS_CASE_COMMON(name), \ +@@ -66,7 +68,9 @@ static int duration = 0; + #define NESTING_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_nesting.o", \ +- .btf_src_file = "btf__core_reloc_" #name ".o" ++ .btf_src_file = "btf__core_reloc_" #name ".o", \ ++ .raw_tp_name = "sys_enter", \ ++ .prog_name = "test_core_nesting" \ + + #define NESTING_CASE(name) { \ + NESTING_CASE_COMMON(name), \ +@@ -91,7 +95,9 @@ static int duration = 0; + #define ARRAYS_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_arrays.o", \ +- .btf_src_file = "btf__core_reloc_" #name ".o" ++ .btf_src_file = "btf__core_reloc_" #name ".o", \ ++ .raw_tp_name = "sys_enter", \ ++ .prog_name = "test_core_arrays" \ + + #define ARRAYS_CASE(name) { \ + ARRAYS_CASE_COMMON(name), \ +@@ -123,7 +129,9 @@ static int duration = 0; + #define PRIMITIVES_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_primitives.o", \ +- .btf_src_file = "btf__core_reloc_" #name ".o" ++ .btf_src_file = "btf__core_reloc_" #name ".o", \ ++ .raw_tp_name = "sys_enter", \ ++ .prog_name = "test_core_primitives" \ + + #define PRIMITIVES_CASE(name) { \ + PRIMITIVES_CASE_COMMON(name), \ +@@ -158,6 +166,8 @@ static int duration = 0; + .e = 5, .f = 6, .g = 7, .h = 8, \ + }, \ + .output_len = sizeof(struct core_reloc_mods_output), \ ++ .raw_tp_name = "sys_enter", \ ++ .prog_name = "test_core_mods", \ + } + + #define PTR_AS_ARR_CASE(name) { \ +@@ -174,6 +184,8 @@ static int duration = 0; + .a = 3, \ + }, \ + .output_len = sizeof(struct core_reloc_ptr_as_arr), \ ++ .raw_tp_name = "sys_enter", \ ++ .prog_name = "test_core_ptr_as_arr", \ + } + + #define INTS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \ +@@ -190,7 +202,9 @@ static int duration = 0; + #define INTS_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_ints.o", \ +- .btf_src_file = "btf__core_reloc_" #name ".o" ++ .btf_src_file = "btf__core_reloc_" #name ".o", \ ++ .raw_tp_name = "sys_enter", \ ++ .prog_name = "test_core_ints" + + #define INTS_CASE(name) { \ + INTS_CASE_COMMON(name), \ +@@ -208,7 +222,9 @@ static int duration = 0; + #define FIELD_EXISTS_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_existence.o", \ +- .btf_src_file = "btf__core_reloc_" #name ".o" \ ++ .btf_src_file = "btf__core_reloc_" #name ".o", \ ++ .raw_tp_name = "sys_enter", \ ++ .prog_name = "test_core_existence" + + #define BITFIELDS_CASE_COMMON(objfile, test_name_prefix, name) \ + .case_name = test_name_prefix#name, \ +@@ -223,6 +239,8 @@ static int duration = 0; + .output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \ + __VA_ARGS__, \ + .output_len = sizeof(struct core_reloc_bitfields_output), \ ++ .raw_tp_name = "sys_enter", \ ++ .prog_name = "test_core_bitfields", \ + }, { \ + BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \ + "direct:", name), \ +@@ -231,7 +249,7 @@ static int duration = 0; + .output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \ + __VA_ARGS__, \ + .output_len = sizeof(struct core_reloc_bitfields_output), \ +- .prog_sec_name = "tp_btf/sys_enter", \ ++ .prog_name = "test_core_bitfields_direct", \ + } + + +@@ -239,17 +257,21 @@ static int duration = 0; + BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \ + "probed:", name), \ + .fails = true, \ ++ .raw_tp_name = "sys_enter", \ ++ .prog_name = "test_core_bitfields", \ + }, { \ + BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \ + "direct:", name), \ +- .prog_sec_name = "tp_btf/sys_enter", \ + .fails = true, \ ++ .prog_name = "test_core_bitfields_direct", \ + } + + #define SIZE_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_size.o", \ +- .btf_src_file = "btf__core_reloc_" #name ".o" ++ .btf_src_file = "btf__core_reloc_" #name ".o", \ ++ .raw_tp_name = "sys_enter", \ ++ .prog_name = "test_core_size" + + #define SIZE_OUTPUT_DATA(type) \ + STRUCT_TO_CHAR_PTR(core_reloc_size_output) { \ +@@ -277,8 +299,10 @@ static int duration = 0; + + #define TYPE_BASED_CASE_COMMON(name) \ + .case_name = #name, \ +- .bpf_obj_file = "test_core_reloc_type_based.o", \ +- .btf_src_file = "btf__core_reloc_" #name ".o" \ ++ .bpf_obj_file = "test_core_reloc_type_based.o", \ ++ .btf_src_file = "btf__core_reloc_" #name ".o", \ ++ .raw_tp_name = "sys_enter", \ ++ .prog_name = "test_core_type_based" + + #define TYPE_BASED_CASE(name, ...) { \ + TYPE_BASED_CASE_COMMON(name), \ +@@ -295,7 +319,9 @@ static int duration = 0; + #define TYPE_ID_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_type_id.o", \ +- .btf_src_file = "btf__core_reloc_" #name ".o" \ ++ .btf_src_file = "btf__core_reloc_" #name ".o", \ ++ .raw_tp_name = "sys_enter", \ ++ .prog_name = "test_core_type_id" + + #define TYPE_ID_CASE(name, setup_fn) { \ + TYPE_ID_CASE_COMMON(name), \ +@@ -312,7 +338,9 @@ static int duration = 0; + #define ENUMVAL_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_enumval.o", \ +- .btf_src_file = "btf__core_reloc_" #name ".o" \ ++ .btf_src_file = "btf__core_reloc_" #name ".o", \ ++ .raw_tp_name = "sys_enter", \ ++ .prog_name = "test_core_enumval" + + #define ENUMVAL_CASE(name, ...) { \ + ENUMVAL_CASE_COMMON(name), \ +@@ -342,7 +370,7 @@ struct core_reloc_test_case { + bool fails; + bool needs_testmod; + bool relaxed_core_relocs; +- const char *prog_sec_name; ++ const char *prog_name; + const char *raw_tp_name; + setup_test_fn setup; + trigger_test_fn trigger; +@@ -497,11 +525,13 @@ static struct core_reloc_test_case test_ + .comm_len = sizeof("test_progs"), + }, + .output_len = sizeof(struct core_reloc_kernel_output), ++ .raw_tp_name = "sys_enter", ++ .prog_name = "test_core_kernel", + }, + + /* validate we can find kernel module BTF types for relocs/attach */ +- MODULES_CASE("module_probed", "raw_tp/bpf_testmod_test_read", "bpf_testmod_test_read"), +- MODULES_CASE("module_direct", "tp_btf/bpf_testmod_test_read", NULL), ++ MODULES_CASE("module_probed", "test_core_module_probed", "bpf_testmod_test_read"), ++ MODULES_CASE("module_direct", "test_core_module_direct", NULL), + + /* validate BPF program can use multiple flavors to match against + * single target BTF type +@@ -580,6 +610,8 @@ static struct core_reloc_test_case test_ + .c = 0, /* BUG in clang, should be 3 */ + }, + .output_len = sizeof(struct core_reloc_misc_output), ++ .raw_tp_name = "sys_enter", ++ .prog_name = "test_core_misc", + }, + + /* validate field existence checks */ +@@ -848,14 +880,9 @@ void test_core_reloc(void) + if (!ASSERT_OK_PTR(obj, "obj_open")) + goto cleanup; + +- probe_name = "raw_tracepoint/sys_enter"; +- tp_name = "sys_enter"; +- if (test_case->prog_sec_name) { +- probe_name = test_case->prog_sec_name; +- tp_name = test_case->raw_tp_name; /* NULL for tp_btf */ +- } +- +- prog = bpf_object__find_program_by_title(obj, probe_name); ++ probe_name = test_case->prog_name; ++ tp_name = test_case->raw_tp_name; /* NULL for tp_btf */ ++ prog = bpf_object__find_program_by_name(obj, probe_name); + if (CHECK(!prog, "find_probe", + "prog '%s' not found\n", probe_name)) + goto cleanup; +--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c ++++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +@@ -101,6 +101,8 @@ static void test_fexit_bpf2bpf_common(co + + for (i = 0; i < prog_cnt; i++) { + struct bpf_link_info link_info; ++ struct bpf_program *pos; ++ const char *pos_sec_name; + char *tgt_name; + __s32 btf_id; + +@@ -109,7 +111,14 @@ static void test_fexit_bpf2bpf_common(co + goto close_prog; + btf_id = btf__find_by_name_kind(btf, tgt_name + 1, BTF_KIND_FUNC); + +- prog[i] = bpf_object__find_program_by_title(obj, prog_name[i]); ++ prog[i] = NULL; ++ bpf_object__for_each_program(pos, obj) { ++ pos_sec_name = bpf_program__section_name(pos); ++ if (pos_sec_name && !strcmp(pos_sec_name, prog_name[i])) { ++ prog[i] = pos; ++ break; ++ } ++ } + if (!ASSERT_OK_PTR(prog[i], prog_name[i])) + goto close_prog; + +@@ -211,8 +220,8 @@ static void test_func_replace_verify(voi + + static int test_second_attach(struct bpf_object *obj) + { +- const char *prog_name = "freplace/get_constant"; +- const char *tgt_name = prog_name + 9; /* cut off freplace/ */ ++ const char *prog_name = "security_new_get_constant"; ++ const char *tgt_name = "get_constant"; + const char *tgt_obj_file = "./test_pkt_access.o"; + struct bpf_program *prog = NULL; + struct bpf_object *tgt_obj; +@@ -220,7 +229,7 @@ static int test_second_attach(struct bpf + struct bpf_link *link; + int err = 0, tgt_fd; + +- prog = bpf_object__find_program_by_title(obj, prog_name); ++ prog = bpf_object__find_program_by_name(obj, prog_name); + if (CHECK(!prog, "find_prog", "prog %s not found\n", prog_name)) + return -ENOENT; + +--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c ++++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c +@@ -89,7 +89,7 @@ void test_get_stack_raw_tp(void) + { + const char *file = "./test_get_stack_rawtp.o"; + const char *file_err = "./test_get_stack_rawtp_err.o"; +- const char *prog_name = "raw_tracepoint/sys_enter"; ++ const char *prog_name = "bpf_prog1"; + int i, err, prog_fd, exp_cnt = MAX_CNT_RAWTP; + struct perf_buffer *pb = NULL; + struct bpf_link *link = NULL; +@@ -107,7 +107,7 @@ void test_get_stack_raw_tp(void) + if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) + return; + +- prog = bpf_object__find_program_by_title(obj, prog_name); ++ prog = bpf_object__find_program_by_name(obj, prog_name); + if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name)) + goto close_prog; + +--- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c ++++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c +@@ -136,7 +136,8 @@ static int start_server(void) + return fd; + } + +-static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title) ++static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title, ++ const char *prog_name) + { + enum bpf_attach_type attach_type; + enum bpf_prog_type prog_type; +@@ -145,20 +146,20 @@ static int prog_attach(struct bpf_object + + err = libbpf_prog_type_by_name(title, &prog_type, &attach_type); + if (err) { +- log_err("Failed to deduct types for %s BPF program", title); ++ log_err("Failed to deduct types for %s BPF program", prog_name); + return -1; + } + +- prog = bpf_object__find_program_by_title(obj, title); ++ prog = bpf_object__find_program_by_name(obj, prog_name); + if (!prog) { +- log_err("Failed to find %s BPF program", title); ++ log_err("Failed to find %s BPF program", prog_name); + return -1; + } + + err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, + attach_type, 0); + if (err) { +- log_err("Failed to attach %s BPF program", title); ++ log_err("Failed to attach %s BPF program", prog_name); + return -1; + } + +@@ -181,11 +182,11 @@ static void run_test(int cgroup_fd) + if (!ASSERT_OK(err, "obj_load")) + goto close_bpf_object; + +- err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt"); ++ err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt", "_getsockopt"); + if (CHECK_FAIL(err)) + goto close_bpf_object; + +- err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt"); ++ err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt", "_setsockopt"); + if (CHECK_FAIL(err)) + goto close_bpf_object; + +--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c ++++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c +@@ -4,7 +4,7 @@ + void test_stacktrace_map(void) + { + int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; +- const char *prog_name = "tracepoint/sched/sched_switch"; ++ const char *prog_name = "oncpu"; + int err, prog_fd, stack_trace_len; + const char *file = "./test_stacktrace_map.o"; + __u32 key, val, duration = 0; +@@ -16,7 +16,7 @@ void test_stacktrace_map(void) + if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + return; + +- prog = bpf_object__find_program_by_title(obj, prog_name); ++ prog = bpf_object__find_program_by_name(obj, prog_name); + if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name)) + goto close_prog; + +--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c ++++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c +@@ -3,7 +3,7 @@ + + void test_stacktrace_map_raw_tp(void) + { +- const char *prog_name = "tracepoint/sched/sched_switch"; ++ const char *prog_name = "oncpu"; + int control_map_fd, stackid_hmap_fd, stackmap_fd; + const char *file = "./test_stacktrace_map.o"; + __u32 key, val, duration = 0; +@@ -16,7 +16,7 @@ void test_stacktrace_map_raw_tp(void) + if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) + return; + +- prog = bpf_object__find_program_by_title(obj, prog_name); ++ prog = bpf_object__find_program_by_name(obj, prog_name); + if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name)) + goto close_prog; + +--- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c ++++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c +@@ -56,11 +56,11 @@ static void setaffinity(void) + + void test_test_overhead(void) + { +- const char *kprobe_name = "kprobe/__set_task_comm"; +- const char *kretprobe_name = "kretprobe/__set_task_comm"; +- const char *raw_tp_name = "raw_tp/task_rename"; +- const char *fentry_name = "fentry/__set_task_comm"; +- const char *fexit_name = "fexit/__set_task_comm"; ++ const char *kprobe_name = "prog1"; ++ const char *kretprobe_name = "prog2"; ++ const char *raw_tp_name = "prog3"; ++ const char *fentry_name = "prog4"; ++ const char *fexit_name = "prog5"; + const char *kprobe_func = "__set_task_comm"; + struct bpf_program *kprobe_prog, *kretprobe_prog, *raw_tp_prog; + struct bpf_program *fentry_prog, *fexit_prog; +@@ -76,23 +76,23 @@ void test_test_overhead(void) + if (!ASSERT_OK_PTR(obj, "obj_open_file")) + return; + +- kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name); ++ kprobe_prog = bpf_object__find_program_by_name(obj, kprobe_name); + if (CHECK(!kprobe_prog, "find_probe", + "prog '%s' not found\n", kprobe_name)) + goto cleanup; +- kretprobe_prog = bpf_object__find_program_by_title(obj, kretprobe_name); ++ kretprobe_prog = bpf_object__find_program_by_name(obj, kretprobe_name); + if (CHECK(!kretprobe_prog, "find_probe", + "prog '%s' not found\n", kretprobe_name)) + goto cleanup; +- raw_tp_prog = bpf_object__find_program_by_title(obj, raw_tp_name); ++ raw_tp_prog = bpf_object__find_program_by_name(obj, raw_tp_name); + if (CHECK(!raw_tp_prog, "find_probe", + "prog '%s' not found\n", raw_tp_name)) + goto cleanup; +- fentry_prog = bpf_object__find_program_by_title(obj, fentry_name); ++ fentry_prog = bpf_object__find_program_by_name(obj, fentry_name); + if (CHECK(!fentry_prog, "find_probe", + "prog '%s' not found\n", fentry_name)) + goto cleanup; +- fexit_prog = bpf_object__find_program_by_title(obj, fexit_name); ++ fexit_prog = bpf_object__find_program_by_name(obj, fexit_name); + if (CHECK(!fexit_prog, "find_probe", + "prog '%s' not found\n", fexit_name)) + goto cleanup; +--- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c ++++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c +@@ -35,7 +35,7 @@ static struct bpf_link *load(struct bpf_ + struct bpf_program *prog; + int duration = 0; + +- prog = bpf_object__find_program_by_title(obj, name); ++ prog = bpf_object__find_program_by_name(obj, name); + if (CHECK(!prog, "find_probe", "prog '%s' not found\n", name)) + return ERR_PTR(-EINVAL); + return bpf_program__attach_trace(prog); +@@ -44,8 +44,8 @@ static struct bpf_link *load(struct bpf_ + /* TODO: use different target function to run in concurrent mode */ + void serial_test_trampoline_count(void) + { +- const char *fentry_name = "fentry/__set_task_comm"; +- const char *fexit_name = "fexit/__set_task_comm"; ++ const char *fentry_name = "prog1"; ++ const char *fexit_name = "prog2"; + const char *object = "test_trampoline_count.o"; + struct inst inst[MAX_TRAMP_PROGS] = {}; + int err, i = 0, duration = 0; diff --git a/patches.suse/selftests-bpf-Test-BPF_MAP_TYPE_PROG_ARRAY-static-in.patch b/patches.suse/selftests-bpf-Test-BPF_MAP_TYPE_PROG_ARRAY-static-in.patch new file mode 100644 index 0000000..2cfc8bf --- /dev/null +++ b/patches.suse/selftests-bpf-Test-BPF_MAP_TYPE_PROG_ARRAY-static-in.patch @@ -0,0 +1,97 @@ +From: Hengqi Chen +Date: Sun, 28 Nov 2021 22:16:33 +0800 +Subject: selftests/bpf: Test BPF_MAP_TYPE_PROG_ARRAY static initialization +Patch-mainline: v5.17-rc1 +Git-commit: baeead213e67a9554d589a2845c634b8e473d107 +References: jsc#PED-1368 + +Add testcase for BPF_MAP_TYPE_PROG_ARRAY static initialization. + +Signed-off-by: Hengqi Chen +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211128141633.502339-3-hengqi.chen@gmail.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/prog_array_init.c | 32 ++++++++++++ + tools/testing/selftests/bpf/progs/test_prog_array_init.c | 39 +++++++++++++++ + 2 files changed, 71 insertions(+) + create mode 100644 tools/testing/selftests/bpf/prog_tests/prog_array_init.c + create mode 100644 tools/testing/selftests/bpf/progs/test_prog_array_init.c + +--- /dev/null ++++ b/tools/testing/selftests/bpf/prog_tests/prog_array_init.c +@@ -0,0 +1,32 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* Copyright (c) 2021 Hengqi Chen */ ++ ++#include ++#include "test_prog_array_init.skel.h" ++ ++void test_prog_array_init(void) ++{ ++ struct test_prog_array_init *skel; ++ int err; ++ ++ skel = test_prog_array_init__open(); ++ if (!ASSERT_OK_PTR(skel, "could not open BPF object")) ++ return; ++ ++ skel->rodata->my_pid = getpid(); ++ ++ err = test_prog_array_init__load(skel); ++ if (!ASSERT_OK(err, "could not load BPF object")) ++ goto cleanup; ++ ++ skel->links.entry = bpf_program__attach_raw_tracepoint(skel->progs.entry, "sys_enter"); ++ if (!ASSERT_OK_PTR(skel->links.entry, "could not attach BPF program")) ++ goto cleanup; ++ ++ usleep(1); ++ ++ ASSERT_EQ(skel->bss->value, 42, "unexpected value"); ++ ++cleanup: ++ test_prog_array_init__destroy(skel); ++} +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/test_prog_array_init.c +@@ -0,0 +1,39 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* Copyright (c) 2021 Hengqi Chen */ ++ ++#include "vmlinux.h" ++#include ++#include ++ ++const volatile pid_t my_pid = 0; ++int value = 0; ++ ++SEC("raw_tp/sys_enter") ++int tailcall_1(void *ctx) ++{ ++ value = 42; ++ return 0; ++} ++ ++struct { ++ __uint(type, BPF_MAP_TYPE_PROG_ARRAY); ++ __uint(max_entries, 2); ++ __uint(key_size, sizeof(__u32)); ++ __array(values, int (void *)); ++} prog_array_init SEC(".maps") = { ++ .values = { ++ [1] = (void *)&tailcall_1, ++ }, ++}; ++ ++SEC("raw_tp/sys_enter") ++int entry(void *ctx) ++{ ++ pid_t pid = bpf_get_current_pid_tgid() >> 32; ++ ++ if (pid != my_pid) ++ return 0; ++ ++ bpf_tail_call(ctx, &prog_array_init, 1); ++ return 0; ++} diff --git a/patches.suse/selftests-bpf-Test-BTF_KIND_DECL_TAG-for-deduplicati.patch b/patches.suse/selftests-bpf-Test-BTF_KIND_DECL_TAG-for-deduplicati.patch new file mode 100644 index 0000000..8fa98e2 --- /dev/null +++ b/patches.suse/selftests-bpf-Test-BTF_KIND_DECL_TAG-for-deduplicati.patch @@ -0,0 +1,193 @@ +From: Yonghong Song +Date: Thu, 11 Nov 2021 17:26:35 -0800 +Subject: selftests/bpf: Test BTF_KIND_DECL_TAG for deduplication +Patch-mainline: v5.17-rc1 +Git-commit: 846f4826d18e660ab668eb26e83c6adf0ceb24d2 +References: jsc#PED-1368 + +Add BTF_KIND_TYPE_TAG duplication unit tests. + +Signed-off-by: Yonghong Song +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211112012635.1506853-1-yhs@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/btf.c | 139 ++++++++++++++++++++++++++- + 1 file changed, 135 insertions(+), 4 deletions(-) + +--- a/tools/testing/selftests/bpf/prog_tests/btf.c ++++ b/tools/testing/selftests/bpf/prog_tests/btf.c +@@ -6878,15 +6878,16 @@ static struct btf_dedup_test dedup_tests + BTF_RESTRICT_ENC(8), /* [11] restrict */ + BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), +- BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), ++ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 18), + BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ + BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */ + BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] decl_tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */ ++ BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */ + BTF_END_RAW, + }, +- BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q"), ++ BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R"), + }, + .expect = { + .raw_types = { +@@ -6907,15 +6908,16 @@ static struct btf_dedup_test dedup_tests + BTF_RESTRICT_ENC(8), /* [11] restrict */ + BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), +- BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), ++ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 18), + BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ + BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */ + BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] decl_tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */ ++ BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */ + BTF_END_RAW, + }, +- BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q"), ++ BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R"), + }, + }, + { +@@ -7221,6 +7223,135 @@ static struct btf_dedup_test dedup_tests + BTF_STR_SEC("\0t\0tag1\0tag2\0tag3"), + }, + }, ++{ ++ .descr = "dedup: btf_type_tag #1", ++ .input = { ++ .raw_types = { ++ /* ptr -> tag2 -> tag1 -> int */ ++ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */ ++ BTF_PTR_ENC(3), /* [4] */ ++ /* ptr -> tag2 -> tag1 -> int */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [5] */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(2), 5), /* [6] */ ++ BTF_PTR_ENC(6), /* [7] */ ++ /* ptr -> tag1 -> int */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [8] */ ++ BTF_PTR_ENC(8), /* [9] */ ++ BTF_END_RAW, ++ }, ++ BTF_STR_SEC("\0tag1\0tag2"), ++ }, ++ .expect = { ++ .raw_types = { ++ /* ptr -> tag2 -> tag1 -> int */ ++ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */ ++ BTF_PTR_ENC(3), /* [4] */ ++ /* ptr -> tag1 -> int */ ++ BTF_PTR_ENC(2), /* [5] */ ++ BTF_END_RAW, ++ }, ++ BTF_STR_SEC("\0tag1\0tag2"), ++ }, ++}, ++{ ++ .descr = "dedup: btf_type_tag #2", ++ .input = { ++ .raw_types = { ++ /* ptr -> tag2 -> tag1 -> int */ ++ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */ ++ BTF_PTR_ENC(3), /* [4] */ ++ /* ptr -> tag2 -> int */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */ ++ BTF_PTR_ENC(5), /* [6] */ ++ BTF_END_RAW, ++ }, ++ BTF_STR_SEC("\0tag1\0tag2"), ++ }, ++ .expect = { ++ .raw_types = { ++ /* ptr -> tag2 -> tag1 -> int */ ++ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */ ++ BTF_PTR_ENC(3), /* [4] */ ++ /* ptr -> tag2 -> int */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */ ++ BTF_PTR_ENC(5), /* [6] */ ++ BTF_END_RAW, ++ }, ++ BTF_STR_SEC("\0tag1\0tag2"), ++ }, ++}, ++{ ++ .descr = "dedup: btf_type_tag #3", ++ .input = { ++ .raw_types = { ++ /* ptr -> tag2 -> tag1 -> int */ ++ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */ ++ BTF_PTR_ENC(3), /* [4] */ ++ /* ptr -> tag1 -> tag2 -> int */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(1), 5), /* [6] */ ++ BTF_PTR_ENC(6), /* [7] */ ++ BTF_END_RAW, ++ }, ++ BTF_STR_SEC("\0tag1\0tag2"), ++ }, ++ .expect = { ++ .raw_types = { ++ /* ptr -> tag2 -> tag1 -> int */ ++ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */ ++ BTF_PTR_ENC(3), /* [4] */ ++ /* ptr -> tag1 -> tag2 -> int */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(1), 5), /* [6] */ ++ BTF_PTR_ENC(6), /* [7] */ ++ BTF_END_RAW, ++ }, ++ BTF_STR_SEC("\0tag1\0tag2"), ++ }, ++}, ++{ ++ .descr = "dedup: btf_type_tag #4", ++ .input = { ++ .raw_types = { ++ /* ptr -> tag1 -> int */ ++ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ ++ BTF_PTR_ENC(2), /* [3] */ ++ /* ptr -> tag1 -> long */ ++ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [4] */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(1), 4), /* [5] */ ++ BTF_PTR_ENC(5), /* [6] */ ++ BTF_END_RAW, ++ }, ++ BTF_STR_SEC("\0tag1"), ++ }, ++ .expect = { ++ .raw_types = { ++ /* ptr -> tag1 -> int */ ++ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ ++ BTF_PTR_ENC(2), /* [3] */ ++ /* ptr -> tag1 -> long */ ++ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [4] */ ++ BTF_TYPE_TAG_ENC(NAME_NTH(1), 4), /* [5] */ ++ BTF_PTR_ENC(5), /* [6] */ ++ BTF_END_RAW, ++ }, ++ BTF_STR_SEC("\0tag1"), ++ }, ++}, + + }; + diff --git a/patches.suse/selftests-bpf-Test-RENAME_EXCHANGE-and-RENAME_NOREPL.patch b/patches.suse/selftests-bpf-Test-RENAME_EXCHANGE-and-RENAME_NOREPL.patch new file mode 100644 index 0000000..7dcbb3b --- /dev/null +++ b/patches.suse/selftests-bpf-Test-RENAME_EXCHANGE-and-RENAME_NOREPL.patch @@ -0,0 +1,109 @@ +From: Lorenz Bauer +Date: Thu, 28 Oct 2021 10:47:24 +0100 +Subject: selftests/bpf: Test RENAME_EXCHANGE and RENAME_NOREPLACE on bpffs +Patch-mainline: v5.16-rc1 +Git-commit: 7e5ad817ec297f91a2fa5c423a39a458a4701bca +References: jsc#PED-1368 + +Add tests to exercise the behaviour of RENAME_EXCHANGE and RENAME_NOREPLACE +on bpffs. The former checks that after an exchange the inode of two +directories has changed. The latter checks that the source still exists +after a failed rename. Generally, having support for renameat2(RENAME_EXCHANGE) +in bpffs fixes atomic upgrades of our sk_lookup control plane. + +Signed-off-by: Lorenz Bauer +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211028094724.59043-5-lmb@cloudflare.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/test_bpffs.c | 65 +++++++++++++++++++- + 1 file changed, 64 insertions(+), 1 deletion(-) + +--- a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c ++++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c +@@ -1,6 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0 + /* Copyright (c) 2020 Facebook */ + #define _GNU_SOURCE ++#include + #include + #include + #include +@@ -29,7 +30,8 @@ static int read_iter(char *file) + + static int fn(void) + { +- int err; ++ struct stat a, b, c; ++ int err, map; + + err = unshare(CLONE_NEWNS); + if (!ASSERT_OK(err, "unshare")) +@@ -67,6 +69,67 @@ static int fn(void) + err = read_iter(TDIR "/fs2/progs.debug"); + if (!ASSERT_OK(err, "reading " TDIR "/fs2/progs.debug")) + goto out; ++ ++ err = mkdir(TDIR "/fs1/a", 0777); ++ if (!ASSERT_OK(err, "creating " TDIR "/fs1/a")) ++ goto out; ++ err = mkdir(TDIR "/fs1/a/1", 0777); ++ if (!ASSERT_OK(err, "creating " TDIR "/fs1/a/1")) ++ goto out; ++ err = mkdir(TDIR "/fs1/b", 0777); ++ if (!ASSERT_OK(err, "creating " TDIR "/fs1/b")) ++ goto out; ++ ++ map = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 4, 1, 0); ++ if (!ASSERT_GT(map, 0, "create_map(ARRAY)")) ++ goto out; ++ err = bpf_obj_pin(map, TDIR "/fs1/c"); ++ if (!ASSERT_OK(err, "pin map")) ++ goto out; ++ close(map); ++ ++ /* Check that RENAME_EXCHANGE works for directories. */ ++ err = stat(TDIR "/fs1/a", &a); ++ if (!ASSERT_OK(err, "stat(" TDIR "/fs1/a)")) ++ goto out; ++ err = renameat2(0, TDIR "/fs1/a", 0, TDIR "/fs1/b", RENAME_EXCHANGE); ++ if (!ASSERT_OK(err, "renameat2(/fs1/a, /fs1/b, RENAME_EXCHANGE)")) ++ goto out; ++ err = stat(TDIR "/fs1/b", &b); ++ if (!ASSERT_OK(err, "stat(" TDIR "/fs1/b)")) ++ goto out; ++ if (!ASSERT_EQ(a.st_ino, b.st_ino, "b should have a's inode")) ++ goto out; ++ err = access(TDIR "/fs1/b/1", F_OK); ++ if (!ASSERT_OK(err, "access(" TDIR "/fs1/b/1)")) ++ goto out; ++ ++ /* Check that RENAME_EXCHANGE works for mixed file types. */ ++ err = stat(TDIR "/fs1/c", &c); ++ if (!ASSERT_OK(err, "stat(" TDIR "/fs1/map)")) ++ goto out; ++ err = renameat2(0, TDIR "/fs1/c", 0, TDIR "/fs1/b", RENAME_EXCHANGE); ++ if (!ASSERT_OK(err, "renameat2(/fs1/c, /fs1/b, RENAME_EXCHANGE)")) ++ goto out; ++ err = stat(TDIR "/fs1/b", &b); ++ if (!ASSERT_OK(err, "stat(" TDIR "/fs1/b)")) ++ goto out; ++ if (!ASSERT_EQ(c.st_ino, b.st_ino, "b should have c's inode")) ++ goto out; ++ err = access(TDIR "/fs1/c/1", F_OK); ++ if (!ASSERT_OK(err, "access(" TDIR "/fs1/c/1)")) ++ goto out; ++ ++ /* Check that RENAME_NOREPLACE works. */ ++ err = renameat2(0, TDIR "/fs1/b", 0, TDIR "/fs1/a", RENAME_NOREPLACE); ++ if (!ASSERT_ERR(err, "renameat2(RENAME_NOREPLACE)")) { ++ err = -EINVAL; ++ goto out; ++ } ++ err = access(TDIR "/fs1/b", F_OK); ++ if (!ASSERT_OK(err, "access(" TDIR "/fs1/b)")) ++ goto out; ++ + out: + umount(TDIR "/fs1"); + umount(TDIR "/fs2"); diff --git a/patches.suse/selftests-bpf-Test-libbpf-API-function-btf__add_type.patch b/patches.suse/selftests-bpf-Test-libbpf-API-function-btf__add_type.patch new file mode 100644 index 0000000..dfc70c7 --- /dev/null +++ b/patches.suse/selftests-bpf-Test-libbpf-API-function-btf__add_type.patch @@ -0,0 +1,144 @@ +From: Yonghong Song +Date: Thu, 11 Nov 2021 17:26:25 -0800 +Subject: selftests/bpf: Test libbpf API function btf__add_type_tag() +Patch-mainline: v5.17-rc1 +Git-commit: 0dc85872203bf7b15c56c7eb228b8f3fabb17ac2 +References: jsc#PED-1368 + +Add unit tests for btf__add_type_tag(). + +Signed-off-by: Yonghong Song +Signed-off-by: Alexei Starovoitov +Acked-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211112012625.1505748-1-yhs@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/btf_helpers.c | 4 - + tools/testing/selftests/bpf/prog_tests/btf_write.c | 67 ++++++++++++--------- + 2 files changed, 43 insertions(+), 28 deletions(-) + +--- a/tools/testing/selftests/bpf/btf_helpers.c ++++ b/tools/testing/selftests/bpf/btf_helpers.c +@@ -25,11 +25,12 @@ static const char * const btf_kind_str_m + [BTF_KIND_DATASEC] = "DATASEC", + [BTF_KIND_FLOAT] = "FLOAT", + [BTF_KIND_DECL_TAG] = "DECL_TAG", ++ [BTF_KIND_TYPE_TAG] = "TYPE_TAG", + }; + + static const char *btf_kind_str(__u16 kind) + { +- if (kind > BTF_KIND_DECL_TAG) ++ if (kind > BTF_KIND_TYPE_TAG) + return "UNKNOWN"; + return btf_kind_str_mapping[kind]; + } +@@ -109,6 +110,7 @@ int fprintf_btf_type_raw(FILE *out, cons + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_TYPEDEF: ++ case BTF_KIND_TYPE_TAG: + fprintf(out, " type_id=%u", t->type); + break; + case BTF_KIND_ARRAY: { +--- a/tools/testing/selftests/bpf/prog_tests/btf_write.c ++++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c +@@ -297,6 +297,16 @@ static void gen_btf(struct btf *btf) + ASSERT_EQ(btf_decl_tag(t)->component_idx, 1, "tag_component_idx"); + ASSERT_STREQ(btf_type_raw_dump(btf, 19), + "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", "raw_dump"); ++ ++ /* TYPE_TAG */ ++ id = btf__add_type_tag(btf, "tag1", 1); ++ ASSERT_EQ(id, 20, "tag_id"); ++ t = btf__type_by_id(btf, 20); ++ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag1", "tag_value"); ++ ASSERT_EQ(btf_kind(t), BTF_KIND_TYPE_TAG, "tag_kind"); ++ ASSERT_EQ(t->type, 1, "tag_type"); ++ ASSERT_STREQ(btf_type_raw_dump(btf, 20), ++ "[20] TYPE_TAG 'tag1' type_id=1", "raw_dump"); + } + + static void test_btf_add() +@@ -337,7 +347,8 @@ static void test_btf_add() + "[17] DATASEC 'datasec1' size=12 vlen=1\n" + "\ttype_id=1 offset=4 size=8", + "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", +- "[19] DECL_TAG 'tag2' type_id=14 component_idx=1"); ++ "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", ++ "[20] TYPE_TAG 'tag1' type_id=1"); + + btf__free(btf); + } +@@ -359,7 +370,7 @@ static void test_btf_add_btf() + gen_btf(btf2); + + id = btf__add_btf(btf1, btf2); +- if (!ASSERT_EQ(id, 20, "id")) ++ if (!ASSERT_EQ(id, 21, "id")) + goto cleanup; + + VALIDATE_RAW_BTF( +@@ -391,35 +402,37 @@ static void test_btf_add_btf() + "\ttype_id=1 offset=4 size=8", + "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", + "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", ++ "[20] TYPE_TAG 'tag1' type_id=1", + + /* types appended from the second BTF */ +- "[20] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", +- "[21] PTR '(anon)' type_id=20", +- "[22] CONST '(anon)' type_id=24", +- "[23] VOLATILE '(anon)' type_id=22", +- "[24] RESTRICT '(anon)' type_id=23", +- "[25] ARRAY '(anon)' type_id=21 index_type_id=20 nr_elems=10", +- "[26] STRUCT 's1' size=8 vlen=2\n" +- "\t'f1' type_id=20 bits_offset=0\n" +- "\t'f2' type_id=20 bits_offset=32 bitfield_size=16", +- "[27] UNION 'u1' size=8 vlen=1\n" +- "\t'f1' type_id=20 bits_offset=0 bitfield_size=16", +- "[28] ENUM 'e1' size=4 vlen=2\n" ++ "[21] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", ++ "[22] PTR '(anon)' type_id=21", ++ "[23] CONST '(anon)' type_id=25", ++ "[24] VOLATILE '(anon)' type_id=23", ++ "[25] RESTRICT '(anon)' type_id=24", ++ "[26] ARRAY '(anon)' type_id=22 index_type_id=21 nr_elems=10", ++ "[27] STRUCT 's1' size=8 vlen=2\n" ++ "\t'f1' type_id=21 bits_offset=0\n" ++ "\t'f2' type_id=21 bits_offset=32 bitfield_size=16", ++ "[28] UNION 'u1' size=8 vlen=1\n" ++ "\t'f1' type_id=21 bits_offset=0 bitfield_size=16", ++ "[29] ENUM 'e1' size=4 vlen=2\n" + "\t'v1' val=1\n" + "\t'v2' val=2", +- "[29] FWD 'struct_fwd' fwd_kind=struct", +- "[30] FWD 'union_fwd' fwd_kind=union", +- "[31] ENUM 'enum_fwd' size=4 vlen=0", +- "[32] TYPEDEF 'typedef1' type_id=20", +- "[33] FUNC 'func1' type_id=34 linkage=global", +- "[34] FUNC_PROTO '(anon)' ret_type_id=20 vlen=2\n" +- "\t'p1' type_id=20\n" +- "\t'p2' type_id=21", +- "[35] VAR 'var1' type_id=20, linkage=global-alloc", +- "[36] DATASEC 'datasec1' size=12 vlen=1\n" +- "\ttype_id=20 offset=4 size=8", +- "[37] DECL_TAG 'tag1' type_id=35 component_idx=-1", +- "[38] DECL_TAG 'tag2' type_id=33 component_idx=1"); ++ "[30] FWD 'struct_fwd' fwd_kind=struct", ++ "[31] FWD 'union_fwd' fwd_kind=union", ++ "[32] ENUM 'enum_fwd' size=4 vlen=0", ++ "[33] TYPEDEF 'typedef1' type_id=21", ++ "[34] FUNC 'func1' type_id=35 linkage=global", ++ "[35] FUNC_PROTO '(anon)' ret_type_id=21 vlen=2\n" ++ "\t'p1' type_id=21\n" ++ "\t'p2' type_id=22", ++ "[36] VAR 'var1' type_id=21, linkage=global-alloc", ++ "[37] DATASEC 'datasec1' size=12 vlen=1\n" ++ "\ttype_id=21 offset=4 size=8", ++ "[38] DECL_TAG 'tag1' type_id=36 component_idx=-1", ++ "[39] DECL_TAG 'tag2' type_id=34 component_idx=1", ++ "[40] TYPE_TAG 'tag1' type_id=21"); + + cleanup: + btf__free(btf1); diff --git a/patches.suse/selftests-bpf-Update-btf_dump__new-uses-to-v1.0-vari.patch b/patches.suse/selftests-bpf-Update-btf_dump__new-uses-to-v1.0-vari.patch new file mode 100644 index 0000000..4359ec9 --- /dev/null +++ b/patches.suse/selftests-bpf-Update-btf_dump__new-uses-to-v1.0-vari.patch @@ -0,0 +1,158 @@ +From: Andrii Nakryiko +Date: Wed, 10 Nov 2021 21:36:22 -0800 +Subject: selftests/bpf: Update btf_dump__new() uses to v1.0+ variant +Patch-mainline: v5.17-rc1 +Git-commit: 60ba87bb6bafaaa6e8ef9a73834cf701194d1923 +References: jsc#PED-1368 + +Update to-be-deprecated forms of btf_dump__new(). + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211111053624.190580-8-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/btf_helpers.c | 4 -- + tools/testing/selftests/bpf/prog_tests/btf_dump.c | 33 ++++++++------------- + tools/testing/selftests/bpf/prog_tests/btf_split.c | 4 -- + 3 files changed, 15 insertions(+), 26 deletions(-) + +--- a/tools/testing/selftests/bpf/btf_helpers.c ++++ b/tools/testing/selftests/bpf/btf_helpers.c +@@ -238,7 +238,6 @@ const char *btf_type_c_dump(const struct + static char buf[16 * 1024]; + FILE *buf_file; + struct btf_dump *d = NULL; +- struct btf_dump_opts opts = {}; + int err, i; + + buf_file = fmemopen(buf, sizeof(buf) - 1, "w"); +@@ -247,8 +246,7 @@ const char *btf_type_c_dump(const struct + return NULL; + } + +- opts.ctx = buf_file; +- d = btf_dump__new(btf, NULL, &opts, btf_dump_printf); ++ d = btf_dump__new(btf, btf_dump_printf, buf_file, NULL); + if (libbpf_get_error(d)) { + fprintf(stderr, "Failed to create btf_dump instance: %ld\n", libbpf_get_error(d)); + goto err_out; +--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c ++++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c +@@ -13,25 +13,23 @@ static struct btf_dump_test_case { + const char *name; + const char *file; + bool known_ptr_sz; +- struct btf_dump_opts opts; + } btf_dump_test_cases[] = { +- {"btf_dump: syntax", "btf_dump_test_case_syntax", true, {}}, +- {"btf_dump: ordering", "btf_dump_test_case_ordering", false, {}}, +- {"btf_dump: padding", "btf_dump_test_case_padding", true, {}}, +- {"btf_dump: packing", "btf_dump_test_case_packing", true, {}}, +- {"btf_dump: bitfields", "btf_dump_test_case_bitfields", true, {}}, +- {"btf_dump: multidim", "btf_dump_test_case_multidim", false, {}}, +- {"btf_dump: namespacing", "btf_dump_test_case_namespacing", false, {}}, ++ {"btf_dump: syntax", "btf_dump_test_case_syntax", true}, ++ {"btf_dump: ordering", "btf_dump_test_case_ordering", false}, ++ {"btf_dump: padding", "btf_dump_test_case_padding", true}, ++ {"btf_dump: packing", "btf_dump_test_case_packing", true}, ++ {"btf_dump: bitfields", "btf_dump_test_case_bitfields", true}, ++ {"btf_dump: multidim", "btf_dump_test_case_multidim", false}, ++ {"btf_dump: namespacing", "btf_dump_test_case_namespacing", false}, + }; + +-static int btf_dump_all_types(const struct btf *btf, +- const struct btf_dump_opts *opts) ++static int btf_dump_all_types(const struct btf *btf, void *ctx) + { + size_t type_cnt = btf__type_cnt(btf); + struct btf_dump *d; + int err = 0, id; + +- d = btf_dump__new(btf, NULL, opts, btf_dump_printf); ++ d = btf_dump__new(btf, btf_dump_printf, ctx, NULL); + err = libbpf_get_error(d); + if (err) + return err; +@@ -88,8 +86,7 @@ static int test_btf_dump_case(int n, str + goto done; + } + +- t->opts.ctx = f; +- err = btf_dump_all_types(btf, &t->opts); ++ err = btf_dump_all_types(btf, f); + fclose(f); + close(fd); + if (CHECK(err, "btf_dump", "failure during C dumping: %d\n", err)) { +@@ -137,7 +134,6 @@ static void test_btf_dump_incremental(vo + { + struct btf *btf = NULL; + struct btf_dump *d = NULL; +- struct btf_dump_opts opts; + int id, err, i; + + dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz); +@@ -146,8 +142,7 @@ static void test_btf_dump_incremental(vo + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "new_empty")) + goto err_out; +- opts.ctx = dump_buf_file; +- d = btf_dump__new(btf, NULL, &opts, btf_dump_printf); ++ d = btf_dump__new(btf, btf_dump_printf, dump_buf_file, NULL); + if (!ASSERT_OK(libbpf_get_error(d), "btf_dump__new")) + goto err_out; + +@@ -800,7 +795,6 @@ static void test_btf_datasec(struct btf + static void test_btf_dump_datasec_data(char *str) + { + struct btf *btf; +- struct btf_dump_opts opts = { .ctx = str }; + char license[4] = "GPL"; + struct btf_dump *d; + +@@ -808,7 +802,7 @@ static void test_btf_dump_datasec_data(c + if (!ASSERT_OK_PTR(btf, "xdping_kern.o BTF not found")) + return; + +- d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf); ++ d = btf_dump__new(btf, btf_dump_snprintf, str, NULL); + if (!ASSERT_OK_PTR(d, "could not create BTF dump")) + goto out; + +@@ -822,7 +816,6 @@ out: + + void test_btf_dump() { + char str[STRSIZE]; +- struct btf_dump_opts opts = { .ctx = str }; + struct btf_dump *d; + struct btf *btf; + int i; +@@ -842,7 +835,7 @@ void test_btf_dump() { + if (!ASSERT_OK_PTR(btf, "no kernel BTF found")) + return; + +- d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf); ++ d = btf_dump__new(btf, btf_dump_snprintf, str, NULL); + if (!ASSERT_OK_PTR(d, "could not create BTF dump")) + return; + +--- a/tools/testing/selftests/bpf/prog_tests/btf_split.c ++++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c +@@ -13,7 +13,6 @@ static void btf_dump_printf(void *ctx, c + } + + void test_btf_split() { +- struct btf_dump_opts opts; + struct btf_dump *d = NULL; + const struct btf_type *t; + struct btf *btf1, *btf2; +@@ -68,8 +67,7 @@ void test_btf_split() { + dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz); + if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream")) + return; +- opts.ctx = dump_buf_file; +- d = btf_dump__new(btf2, NULL, &opts, btf_dump_printf); ++ d = btf_dump__new(btf2, btf_dump_printf, dump_buf_file, NULL); + if (!ASSERT_OK_PTR(d, "btf_dump__new")) + goto cleanup; + for (i = 1; i < btf__type_cnt(btf2); i++) { diff --git a/patches.suse/selftests-bpf-Update-test-names-for-xchg-and-cmpxchg.patch b/patches.suse/selftests-bpf-Update-test-names-for-xchg-and-cmpxchg.patch new file mode 100644 index 0000000..5581a2d --- /dev/null +++ b/patches.suse/selftests-bpf-Update-test-names-for-xchg-and-cmpxchg.patch @@ -0,0 +1,39 @@ +From: "Paul E. McKenney" +Date: Tue, 30 Nov 2021 16:50:30 -0800 +Subject: selftests/bpf: Update test names for xchg and cmpxchg +Patch-mainline: v5.17-rc1 +Git-commit: 8b4ff5f8bb126fa8ee6918f4854748277609cf68 +References: jsc#PED-1368 + +The test_cmpxchg() and test_xchg() functions say "test_run add". +Therefore, make them say "test_run cmpxchg" and "test_run xchg", +respectively. + +Signed-off-by: Paul E. McKenney +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211201005030.GA3071525@paulmck-ThinkPad-P17-Gen-1 +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/prog_tests/atomics.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/tools/testing/selftests/bpf/prog_tests/atomics.c ++++ b/tools/testing/selftests/bpf/prog_tests/atomics.c +@@ -167,7 +167,7 @@ static void test_cmpxchg(struct atomics_ + prog_fd = skel->progs.cmpxchg.prog_fd; + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); +- if (CHECK(err || retval, "test_run add", ++ if (CHECK(err || retval, "test_run cmpxchg", + "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) + goto cleanup; + +@@ -196,7 +196,7 @@ static void test_xchg(struct atomics_lsk + prog_fd = skel->progs.xchg.prog_fd; + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); +- if (CHECK(err || retval, "test_run add", ++ if (CHECK(err || retval, "test_run xchg", + "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) + goto cleanup; + diff --git a/patches.suse/selftests-bpf-Use-explicit-bpf_prog_test_load-calls-.patch b/patches.suse/selftests-bpf-Use-explicit-bpf_prog_test_load-calls-.patch new file mode 100644 index 0000000..5f96242 --- /dev/null +++ b/patches.suse/selftests-bpf-Use-explicit-bpf_prog_test_load-calls-.patch @@ -0,0 +1,695 @@ +From: Andrii Nakryiko +Date: Wed, 3 Nov 2021 15:08:44 -0700 +Subject: selftests/bpf: Use explicit bpf_prog_test_load() calls everywhere +Patch-mainline: v5.17-rc1 +Git-commit: cbdb1461dcf45765a036e9f6975ffe19e69bdc33 +References: jsc#PED-1368 + +-Dbpf_prog_load_deprecated=bpf_prog_test_load trick is both ugly and +breaks when deprecation goes into effect due to macro magic. Convert all +the uses to explicit bpf_prog_test_load() calls which avoid deprecation +errors and makes everything less magical. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Acked-by: Dave Marchevsky +Link: https://lore.kernel.org/bpf/20211103220845.2676888-12-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/Makefile | 2 - + tools/testing/selftests/bpf/flow_dissector_load.h | 3 + + tools/testing/selftests/bpf/get_cgroup_id_user.c | 5 +- + tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c | 2 - + tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c | 8 ++-- + tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c | 4 +- + tools/testing/selftests/bpf/prog_tests/global_data.c | 2 - + tools/testing/selftests/bpf/prog_tests/global_func_args.c | 2 - + tools/testing/selftests/bpf/prog_tests/kfree_skb.c | 2 - + tools/testing/selftests/bpf/prog_tests/l4lb_all.c | 2 - + tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c | 2 - + tools/testing/selftests/bpf/prog_tests/map_lock.c | 4 +- + tools/testing/selftests/bpf/prog_tests/pkt_access.c | 2 - + tools/testing/selftests/bpf/prog_tests/pkt_md_access.c | 2 - + tools/testing/selftests/bpf/prog_tests/queue_stack_map.c | 2 - + tools/testing/selftests/bpf/prog_tests/skb_ctx.c | 2 - + tools/testing/selftests/bpf/prog_tests/skb_helpers.c | 2 - + tools/testing/selftests/bpf/prog_tests/spinlock.c | 4 +- + tools/testing/selftests/bpf/prog_tests/stacktrace_map.c | 2 - + tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c | 2 - + tools/testing/selftests/bpf/prog_tests/tailcalls.c | 18 +++++----- + tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c | 2 - + tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c | 4 +- + tools/testing/selftests/bpf/prog_tests/tcp_estats.c | 2 - + tools/testing/selftests/bpf/prog_tests/tp_attach_query.c | 2 - + tools/testing/selftests/bpf/prog_tests/xdp.c | 2 - + tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c | 6 +-- + tools/testing/selftests/bpf/prog_tests/xdp_attach.c | 6 +-- + tools/testing/selftests/bpf/prog_tests/xdp_info.c | 2 - + tools/testing/selftests/bpf/prog_tests/xdp_perf.c | 2 - + tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c | 2 - + tools/testing/selftests/bpf/test_dev_cgroup.c | 3 + + tools/testing/selftests/bpf/test_lirc_mode2_user.c | 6 ++- + tools/testing/selftests/bpf/test_maps.c | 7 ++- + tools/testing/selftests/bpf/test_sysctl.c | 1 + tools/testing/selftests/bpf/test_tcpnotify_user.c | 3 + + tools/testing/selftests/bpf/xdping.c | 3 + + 37 files changed, 68 insertions(+), 59 deletions(-) + +--- a/tools/testing/selftests/bpf/Makefile ++++ b/tools/testing/selftests/bpf/Makefile +@@ -24,7 +24,6 @@ SAN_CFLAGS ?= + CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS) \ + -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ + -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) \ +- -Dbpf_prog_load_deprecated=bpf_prog_test_load \ + -Dbpf_load_program=bpf_test_load_program + LDLIBS += -lcap -lelf -lz -lrt -lpthread + +@@ -207,6 +206,7 @@ $(OUTPUT)/test_lirc_mode2_user: testing_ + $(OUTPUT)/xdping: testing_helpers.o + $(OUTPUT)/flow_dissector_load: testing_helpers.o + $(OUTPUT)/test_maps: testing_helpers.o ++$(OUTPUT)/test_verifier: testing_helpers.o + + BPFTOOL ?= $(DEFAULT_BPFTOOL) + $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ +--- a/tools/testing/selftests/bpf/flow_dissector_load.h ++++ b/tools/testing/selftests/bpf/flow_dissector_load.h +@@ -4,6 +4,7 @@ + + #include + #include ++#include "testing_helpers.h" + + static inline int bpf_flow_load(struct bpf_object **obj, + const char *path, +@@ -18,7 +19,7 @@ static inline int bpf_flow_load(struct b + int prog_array_fd; + int ret, fd, i; + +- ret = bpf_prog_load(path, BPF_PROG_TYPE_FLOW_DISSECTOR, obj, ++ ret = bpf_prog_test_load(path, BPF_PROG_TYPE_FLOW_DISSECTOR, obj, + prog_fd); + if (ret) + return ret; +--- a/tools/testing/selftests/bpf/get_cgroup_id_user.c ++++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c +@@ -19,6 +19,7 @@ + #include + + #include "cgroup_helpers.h" ++#include "testing_helpers.h" + #include "bpf_rlimit.h" + + #define CHECK(condition, tag, format...) ({ \ +@@ -66,8 +67,8 @@ int main(int argc, char **argv) + if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno)) + return 1; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); +- if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno)) ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); ++ if (CHECK(err, "bpf_prog_test_load", "err %d errno %d\n", err, errno)) + goto cleanup_cgroup_env; + + cgidmap_fd = bpf_find_map(__func__, obj, "cg_ids"); +--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c ++++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c +@@ -48,7 +48,7 @@ void serial_test_bpf_obj_id(void) + bzero(zeros, sizeof(zeros)); + for (i = 0; i < nr_iters; i++) { + now = time(NULL); +- err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, + &objs[i], &prog_fds[i]); + /* test_obj_id.o is a dumb prog. It should never fail + * to load. +--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c ++++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +@@ -65,7 +65,7 @@ static void test_fexit_bpf2bpf_common(co + int err, tgt_fd, i; + struct btf *btf; + +- err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, ++ err = bpf_prog_test_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, + &tgt_obj, &tgt_fd); + if (!ASSERT_OK(err, "tgt_prog_load")) + return; +@@ -224,7 +224,7 @@ static int test_second_attach(struct bpf + if (CHECK(!prog, "find_prog", "prog %s not found\n", prog_name)) + return -ENOENT; + +- err = bpf_prog_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC, ++ err = bpf_prog_test_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC, + &tgt_obj, &tgt_fd); + if (CHECK(err, "second_prog_load", "file %s err %d errno %d\n", + tgt_obj_file, err, errno)) +@@ -274,7 +274,7 @@ static void test_fmod_ret_freplace(void) + __u32 duration = 0; + int err, pkt_fd, attach_prog_fd; + +- err = bpf_prog_load(tgt_name, BPF_PROG_TYPE_UNSPEC, ++ err = bpf_prog_test_load(tgt_name, BPF_PROG_TYPE_UNSPEC, + &pkt_obj, &pkt_fd); + /* the target prog should load fine */ + if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n", +@@ -341,7 +341,7 @@ static void test_obj_load_failure_common + int err, pkt_fd; + __u32 duration = 0; + +- err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, ++ err = bpf_prog_test_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, + &pkt_obj, &pkt_fd); + /* the target prog should load fine */ + if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n", +--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c ++++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c +@@ -94,11 +94,11 @@ void test_get_stack_raw_tp(void) + struct bpf_map *map; + cpu_set_t cpu_set; + +- err = bpf_prog_load(file_err, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); ++ err = bpf_prog_test_load(file_err, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err >= 0, "prog_load raw tp", "err %d errno %d\n", err, errno)) + return; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) + return; + +--- a/tools/testing/selftests/bpf/prog_tests/global_data.c ++++ b/tools/testing/selftests/bpf/prog_tests/global_data.c +@@ -136,7 +136,7 @@ void test_global_data(void) + struct bpf_object *obj; + int err, prog_fd; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + if (CHECK(err, "load program", "error %d loading %s\n", err, file)) + return; + +--- a/tools/testing/selftests/bpf/prog_tests/global_func_args.c ++++ b/tools/testing/selftests/bpf/prog_tests/global_func_args.c +@@ -44,7 +44,7 @@ void test_global_func_args(void) + struct bpf_object *obj; + int err, prog_fd; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); + if (CHECK(err, "load program", "error %d loading %s\n", err, file)) + return; + +--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c ++++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c +@@ -74,7 +74,7 @@ void serial_test_kfree_skb(void) + const int zero = 0; + bool test_ok[2]; + +- err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, ++ err = bpf_prog_test_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &tattr.prog_fd); + if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) + return; +--- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c ++++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c +@@ -30,7 +30,7 @@ static void test_l4lb(const char *file) + char buf[128]; + u32 *magic = (u32 *)buf; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + +--- a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c ++++ b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c +@@ -27,7 +27,7 @@ void test_load_bytes_relative(void) + if (CHECK_FAIL(server_fd < 0)) + goto close_cgroup_fd; + +- err = bpf_prog_load("./load_bytes_relative.o", BPF_PROG_TYPE_CGROUP_SKB, ++ err = bpf_prog_test_load("./load_bytes_relative.o", BPF_PROG_TYPE_CGROUP_SKB, + &obj, &prog_fd); + if (CHECK_FAIL(err)) + goto close_server_fd; +--- a/tools/testing/selftests/bpf/prog_tests/map_lock.c ++++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c +@@ -53,9 +53,9 @@ void test_map_lock(void) + int err = 0, key = 0, i; + void *ret; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); + if (CHECK_FAIL(err)) { +- printf("test_map_lock:bpf_prog_load errno %d\n", errno); ++ printf("test_map_lock:bpf_prog_test_load errno %d\n", errno); + goto close_prog; + } + map_fd[0] = bpf_find_map(__func__, obj, "hash_map"); +--- a/tools/testing/selftests/bpf/prog_tests/pkt_access.c ++++ b/tools/testing/selftests/bpf/prog_tests/pkt_access.c +@@ -9,7 +9,7 @@ void test_pkt_access(void) + __u32 duration, retval; + int err, prog_fd; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + +--- a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c ++++ b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c +@@ -9,7 +9,7 @@ void test_pkt_md_access(void) + __u32 duration, retval; + int err, prog_fd; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + +--- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c ++++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c +@@ -27,7 +27,7 @@ static void test_queue_stack_map_by_type + else + return; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + +--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c ++++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c +@@ -32,7 +32,7 @@ void test_skb_ctx(void) + int err; + int i; + +- err = bpf_prog_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS, &obj, ++ err = bpf_prog_test_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + &tattr.prog_fd); + if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) + return; +--- a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c ++++ b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c +@@ -20,7 +20,7 @@ void test_skb_helpers(void) + struct bpf_object *obj; + int err; + +- err = bpf_prog_load("./test_skb_helpers.o", BPF_PROG_TYPE_SCHED_CLS, &obj, ++ err = bpf_prog_test_load("./test_skb_helpers.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + &tattr.prog_fd); + if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) + return; +--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c ++++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c +@@ -24,9 +24,9 @@ void test_spinlock(void) + int err = 0, i; + void *ret; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); + if (CHECK_FAIL(err)) { +- printf("test_spin_lock:bpf_prog_load errno %d\n", errno); ++ printf("test_spin_lock:bpf_prog_test_load errno %d\n", errno); + goto close_prog; + } + for (i = 0; i < 4; i++) +--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c ++++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c +@@ -12,7 +12,7 @@ void test_stacktrace_map(void) + struct bpf_object *obj; + struct bpf_link *link; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + return; + +--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c ++++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c +@@ -12,7 +12,7 @@ void test_stacktrace_map_raw_tp(void) + struct bpf_object *obj; + struct bpf_link *link = NULL; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) + return; + +--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c ++++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c +@@ -16,7 +16,7 @@ static void test_tailcall_1(void) + char prog_name[32]; + char buff[128] = {}; + +- err = bpf_prog_load("tailcall1.o", BPF_PROG_TYPE_SCHED_CLS, &obj, ++ err = bpf_prog_test_load("tailcall1.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + &prog_fd); + if (CHECK_FAIL(err)) + return; +@@ -154,7 +154,7 @@ static void test_tailcall_2(void) + char prog_name[32]; + char buff[128] = {}; + +- err = bpf_prog_load("tailcall2.o", BPF_PROG_TYPE_SCHED_CLS, &obj, ++ err = bpf_prog_test_load("tailcall2.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + &prog_fd); + if (CHECK_FAIL(err)) + return; +@@ -228,7 +228,7 @@ static void test_tailcall_count(const ch + __u32 retval, duration; + char buff[128] = {}; + +- err = bpf_prog_load(which, BPF_PROG_TYPE_SCHED_CLS, &obj, ++ err = bpf_prog_test_load(which, BPF_PROG_TYPE_SCHED_CLS, &obj, + &prog_fd); + if (CHECK_FAIL(err)) + return; +@@ -324,7 +324,7 @@ static void test_tailcall_4(void) + char buff[128] = {}; + char prog_name[32]; + +- err = bpf_prog_load("tailcall4.o", BPF_PROG_TYPE_SCHED_CLS, &obj, ++ err = bpf_prog_test_load("tailcall4.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + &prog_fd); + if (CHECK_FAIL(err)) + return; +@@ -412,7 +412,7 @@ static void test_tailcall_5(void) + char buff[128] = {}; + char prog_name[32]; + +- err = bpf_prog_load("tailcall5.o", BPF_PROG_TYPE_SCHED_CLS, &obj, ++ err = bpf_prog_test_load("tailcall5.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + &prog_fd); + if (CHECK_FAIL(err)) + return; +@@ -498,7 +498,7 @@ static void test_tailcall_bpf2bpf_1(void + __u32 retval, duration; + char prog_name[32]; + +- err = bpf_prog_load("tailcall_bpf2bpf1.o", BPF_PROG_TYPE_SCHED_CLS, ++ err = bpf_prog_test_load("tailcall_bpf2bpf1.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; +@@ -582,7 +582,7 @@ static void test_tailcall_bpf2bpf_2(void + __u32 retval, duration; + char buff[128] = {}; + +- err = bpf_prog_load("tailcall_bpf2bpf2.o", BPF_PROG_TYPE_SCHED_CLS, ++ err = bpf_prog_test_load("tailcall_bpf2bpf2.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; +@@ -660,7 +660,7 @@ static void test_tailcall_bpf2bpf_3(void + __u32 retval, duration; + char prog_name[32]; + +- err = bpf_prog_load("tailcall_bpf2bpf3.o", BPF_PROG_TYPE_SCHED_CLS, ++ err = bpf_prog_test_load("tailcall_bpf2bpf3.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; +@@ -757,7 +757,7 @@ static void test_tailcall_bpf2bpf_4(bool + __u32 retval, duration; + char prog_name[32]; + +- err = bpf_prog_load("tailcall_bpf2bpf4.o", BPF_PROG_TYPE_SCHED_CLS, ++ err = bpf_prog_test_load("tailcall_bpf2bpf4.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; +--- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c ++++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c +@@ -11,7 +11,7 @@ void test_task_fd_query_rawtp(void) + __u32 duration = 0; + char buf[256]; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) + return; + +--- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c ++++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c +@@ -13,8 +13,8 @@ static void test_task_fd_query_tp_core(c + __u32 duration = 0; + char buf[256]; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); +- if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno)) ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); ++ if (CHECK(err, "bpf_prog_test_load", "err %d errno %d\n", err, errno)) + goto close_prog; + + snprintf(buf, sizeof(buf), +--- a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c ++++ b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c +@@ -8,7 +8,7 @@ void test_tcp_estats(void) + struct bpf_object *obj; + __u32 duration = 0; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); + CHECK(err, "", "err %d errno %d\n", err, errno); + if (err) + return; +--- a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c ++++ b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c +@@ -35,7 +35,7 @@ void serial_test_tp_attach_query(void) + + query = malloc(sizeof(*query) + sizeof(__u32) * num_progs); + for (i = 0; i < num_progs; i++) { +- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i], ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i], + &prog_fd[i]); + if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + goto cleanup1; +--- a/tools/testing/selftests/bpf/prog_tests/xdp.c ++++ b/tools/testing/selftests/bpf/prog_tests/xdp.c +@@ -16,7 +16,7 @@ void test_xdp(void) + __u32 duration, retval, size; + int err, prog_fd, map_fd; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + +--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c ++++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +@@ -10,7 +10,7 @@ static void test_xdp_adjust_tail_shrink( + int err, prog_fd; + char buf[128]; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + +@@ -38,7 +38,7 @@ static void test_xdp_adjust_tail_grow(vo + __u32 duration, retval, size, expect_sz; + int err, prog_fd; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + +@@ -75,7 +75,7 @@ static void test_xdp_adjust_tail_grow2(v + .data_size_out = 0, /* Per test */ + }; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd); + if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) + return; + +--- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c ++++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c +@@ -16,7 +16,7 @@ void serial_test_xdp_attach(void) + + len = sizeof(info); + +- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj1, &fd1); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj1, &fd1); + if (CHECK_FAIL(err)) + return; + err = bpf_obj_get_info_by_fd(fd1, &info, &len); +@@ -24,7 +24,7 @@ void serial_test_xdp_attach(void) + goto out_1; + id1 = info.id; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj2, &fd2); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj2, &fd2); + if (CHECK_FAIL(err)) + goto out_1; + +@@ -34,7 +34,7 @@ void serial_test_xdp_attach(void) + goto out_2; + id2 = info.id; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj3, &fd3); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj3, &fd3); + if (CHECK_FAIL(err)) + goto out_2; + +--- a/tools/testing/selftests/bpf/prog_tests/xdp_info.c ++++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c +@@ -29,7 +29,7 @@ void serial_test_xdp_info(void) + + /* Setup prog */ + +- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + +--- a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c ++++ b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c +@@ -9,7 +9,7 @@ void test_xdp_perf(void) + char in[128], out[128]; + int err, prog_fd; + +- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); ++ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + +--- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c ++++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c +@@ -73,7 +73,7 @@ int test_subprog2(struct args_subprog2 * + __builtin_preserve_access_index(&skb->len)); + + ret = ctx->ret; +- /* bpf_prog_load() loads "test_pkt_access.o" with BPF_F_TEST_RND_HI32 ++ /* bpf_prog_test_load() loads "test_pkt_access.o" with BPF_F_TEST_RND_HI32 + * which randomizes upper 32 bits after BPF_ALU32 insns. + * Hence after 'w0 <<= 1' upper bits of $rax are random. + * That is expected and correct. Trim them. +--- a/tools/testing/selftests/bpf/test_dev_cgroup.c ++++ b/tools/testing/selftests/bpf/test_dev_cgroup.c +@@ -14,6 +14,7 @@ + #include + + #include "cgroup_helpers.h" ++#include "testing_helpers.h" + #include "bpf_rlimit.h" + + #define DEV_CGROUP_PROG "./dev_cgroup.o" +@@ -27,7 +28,7 @@ int main(int argc, char **argv) + int prog_fd, cgroup_fd; + __u32 prog_cnt; + +- if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE, ++ if (bpf_prog_test_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE, + &obj, &prog_fd)) { + printf("Failed to load DEV_CGROUP program\n"); + goto out; +--- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c ++++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c +@@ -45,6 +45,8 @@ + #include + #include + ++#include "testing_helpers.h" ++ + int main(int argc, char **argv) + { + struct bpf_object *obj; +@@ -58,8 +60,8 @@ int main(int argc, char **argv) + return 2; + } + +- ret = bpf_prog_load("test_lirc_mode2_kern.o", +- BPF_PROG_TYPE_LIRC_MODE2, &obj, &progfd); ++ ret = bpf_prog_test_load("test_lirc_mode2_kern.o", ++ BPF_PROG_TYPE_LIRC_MODE2, &obj, &progfd); + if (ret) { + printf("Failed to load bpf program\n"); + return 1; +--- a/tools/testing/selftests/bpf/test_maps.c ++++ b/tools/testing/selftests/bpf/test_maps.c +@@ -25,6 +25,7 @@ + #include "bpf_util.h" + #include "bpf_rlimit.h" + #include "test_maps.h" ++#include "testing_helpers.h" + + #ifndef ENOTSUPP + #define ENOTSUPP 524 +@@ -830,21 +831,21 @@ static void test_sockmap(unsigned int ta + } + + /* Load SK_SKB program and Attach */ +- err = bpf_prog_load(SOCKMAP_PARSE_PROG, ++ err = bpf_prog_test_load(SOCKMAP_PARSE_PROG, + BPF_PROG_TYPE_SK_SKB, &obj, &parse_prog); + if (err) { + printf("Failed to load SK_SKB parse prog\n"); + goto out_sockmap; + } + +- err = bpf_prog_load(SOCKMAP_TCP_MSG_PROG, ++ err = bpf_prog_test_load(SOCKMAP_TCP_MSG_PROG, + BPF_PROG_TYPE_SK_MSG, &obj, &msg_prog); + if (err) { + printf("Failed to load SK_SKB msg prog\n"); + goto out_sockmap; + } + +- err = bpf_prog_load(SOCKMAP_VERDICT_PROG, ++ err = bpf_prog_test_load(SOCKMAP_VERDICT_PROG, + BPF_PROG_TYPE_SK_SKB, &obj, &verdict_prog); + if (err) { + printf("Failed to load SK_SKB verdict prog\n"); +--- a/tools/testing/selftests/bpf/test_sysctl.c ++++ b/tools/testing/selftests/bpf/test_sysctl.c +@@ -17,6 +17,7 @@ + #include "bpf_rlimit.h" + #include "bpf_util.h" + #include "cgroup_helpers.h" ++#include "testing_helpers.h" + + #define CG_PATH "/foo" + #define MAX_INSNS 512 +--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c ++++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c +@@ -25,6 +25,7 @@ + + #include "test_tcpnotify.h" + #include "trace_helpers.h" ++#include "testing_helpers.h" + + #define SOCKET_BUFFER_SIZE (getpagesize() < 8192L ? getpagesize() : 8192L) + +@@ -92,7 +93,7 @@ int main(int argc, char **argv) + if (cg_fd < 0) + goto err; + +- if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { ++ if (bpf_prog_test_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { + printf("FAILED: load_bpf_file failed for: %s\n", file); + goto err; + } +--- a/tools/testing/selftests/bpf/xdping.c ++++ b/tools/testing/selftests/bpf/xdping.c +@@ -22,6 +22,7 @@ + #include "bpf/libbpf.h" + + #include "xdping.h" ++#include "testing_helpers.h" + + static int ifindex; + static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; +@@ -173,7 +174,7 @@ int main(int argc, char **argv) + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + +- if (bpf_prog_load(filename, BPF_PROG_TYPE_XDP, &obj, &prog_fd)) { ++ if (bpf_prog_test_load(filename, BPF_PROG_TYPE_XDP, &obj, &prog_fd)) { + fprintf(stderr, "load of %s failed\n", filename); + return 1; + } diff --git a/patches.suse/selftests-bpf-Use-explicit-bpf_test_load_program-hel.patch b/patches.suse/selftests-bpf-Use-explicit-bpf_test_load_program-hel.patch new file mode 100644 index 0000000..d6496d8 --- /dev/null +++ b/patches.suse/selftests-bpf-Use-explicit-bpf_test_load_program-hel.patch @@ -0,0 +1,149 @@ +From: Andrii Nakryiko +Date: Wed, 3 Nov 2021 15:08:45 -0700 +Subject: selftests/bpf: Use explicit bpf_test_load_program() helper calls +Patch-mainline: v5.17-rc1 +Git-commit: f19ddfe0360a1aa64db0b4a41f59e1ade3f6d288 +References: jsc#PED-1368 + +Remove the second part of prog loading testing helper re-definition: + + -Dbpf_load_program=bpf_test_load_program + +This completes the clean up of deprecated libbpf program loading APIs. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Acked-by: Dave Marchevsky +Link: https://lore.kernel.org/bpf/20211103220845.2676888-13-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/Makefile | 3 +-- + tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c | 2 +- + tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c | 2 +- + tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c | 2 +- + tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c | 2 +- + tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c | 4 ++-- + tools/testing/selftests/bpf/prog_tests/signal_pending.c | 2 +- + tools/testing/selftests/bpf/test_cgroup_storage.c | 3 ++- + tools/testing/selftests/bpf/test_tag.c | 3 ++- + 9 files changed, 12 insertions(+), 11 deletions(-) + +--- a/tools/testing/selftests/bpf/Makefile ++++ b/tools/testing/selftests/bpf/Makefile +@@ -23,8 +23,7 @@ BPF_GCC ?= $(shell command -v bpf-gcc;) + SAN_CFLAGS ?= + CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS) \ + -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ +- -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) \ +- -Dbpf_load_program=bpf_test_load_program ++ -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) + LDLIBS += -lcap -lelf -lz -lrt -lpthread + + # Silence some warnings when compiled with clang +--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c ++++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c +@@ -16,7 +16,7 @@ static int prog_load(void) + }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + +- return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, ++ return bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog, insns_cnt, "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); + } +--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c ++++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c +@@ -66,7 +66,7 @@ static int prog_load_cnt(int verdict, in + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + int ret; + +- ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, ++ ret = bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog, insns_cnt, "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); + +--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c ++++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c +@@ -18,7 +18,7 @@ static int prog_load(int verdict) + }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + +- return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, ++ return bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog, insns_cnt, "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); + } +--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c ++++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c +@@ -30,7 +30,7 @@ void serial_test_flow_dissector_load_byt + + /* make sure bpf_skb_load_bytes is not allowed from skb-less context + */ +- fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog, ++ fd = bpf_test_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog, + ARRAY_SIZE(prog), "GPL", 0, NULL, 0); + CHECK(fd < 0, + "flow_dissector-bpf_skb_load_bytes-load", +--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c ++++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c +@@ -47,9 +47,9 @@ static int load_prog(enum bpf_prog_type + }; + int fd; + +- fd = bpf_load_program(type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0); ++ fd = bpf_test_load_program(type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0); + if (CHECK_FAIL(fd < 0)) +- perror("bpf_load_program"); ++ perror("bpf_test_load_program"); + + return fd; + } +--- a/tools/testing/selftests/bpf/prog_tests/signal_pending.c ++++ b/tools/testing/selftests/bpf/prog_tests/signal_pending.c +@@ -22,7 +22,7 @@ static void test_signal_pending_by_type( + prog[i] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0); + prog[ARRAY_SIZE(prog) - 1] = BPF_EXIT_INSN(); + +- prog_fd = bpf_load_program(prog_type, prog, ARRAY_SIZE(prog), ++ prog_fd = bpf_test_load_program(prog_type, prog, ARRAY_SIZE(prog), + "GPL", 0, NULL, 0); + CHECK(prog_fd < 0, "test-run", "errno %d\n", errno); + +--- a/tools/testing/selftests/bpf/test_cgroup_storage.c ++++ b/tools/testing/selftests/bpf/test_cgroup_storage.c +@@ -8,6 +8,7 @@ + + #include "bpf_rlimit.h" + #include "cgroup_helpers.h" ++#include "testing_helpers.h" + + char bpf_log_buf[BPF_LOG_BUF_SIZE]; + +@@ -66,7 +67,7 @@ int main(int argc, char **argv) + + prog[0].imm = percpu_map_fd; + prog[7].imm = map_fd; +- prog_fd = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, ++ prog_fd = bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog, insns_cnt, "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); + if (prog_fd < 0) { +--- a/tools/testing/selftests/bpf/test_tag.c ++++ b/tools/testing/selftests/bpf/test_tag.c +@@ -21,6 +21,7 @@ + + #include "../../../include/linux/filter.h" + #include "bpf_rlimit.h" ++#include "testing_helpers.h" + + static struct bpf_insn prog[BPF_MAXINSNS]; + +@@ -57,7 +58,7 @@ static int bpf_try_load_prog(int insns, + int fd_prog; + + bpf_filler(insns, fd_map); +- fd_prog = bpf_load_program(BPF_PROG_TYPE_SCHED_CLS, prog, insns, "", 0, ++ fd_prog = bpf_test_load_program(BPF_PROG_TYPE_SCHED_CLS, prog, insns, "", 0, + NULL, 0); + assert(fd_prog > 0); + if (fd_map > 0) diff --git a/patches.suse/selftests-bpf-Variable-naming-fix.patch b/patches.suse/selftests-bpf-Variable-naming-fix.patch new file mode 100644 index 0000000..62db533 --- /dev/null +++ b/patches.suse/selftests-bpf-Variable-naming-fix.patch @@ -0,0 +1,66 @@ +From: Yucong Sun +Date: Fri, 12 Nov 2021 11:25:33 -0800 +Subject: selftests/bpf: Variable naming fix +Patch-mainline: v5.17-rc1 +Git-commit: 67d61d30b8a8f33d2a4f269f3a548409827d7b01 +References: jsc#PED-1368 + +Change log_fd to log_fp to reflect its type correctly. + +Signed-off-by: Yucong Sun +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211112192535.898352-3-fallentree@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/testing/selftests/bpf/test_progs.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +--- a/tools/testing/selftests/bpf/test_progs.c ++++ b/tools/testing/selftests/bpf/test_progs.c +@@ -939,7 +939,7 @@ static void *dispatch_thread(void *ctx) + { + struct dispatch_data *data = ctx; + int sock_fd; +- FILE *log_fd = NULL; ++ FILE *log_fp = NULL; + + sock_fd = data->sock_fd; + +@@ -1002,8 +1002,8 @@ static void *dispatch_thread(void *ctx) + + /* collect all logs */ + if (msg_test_done.test_done.have_log) { +- log_fd = open_memstream(&result->log_buf, &result->log_cnt); +- if (!log_fd) ++ log_fp = open_memstream(&result->log_buf, &result->log_cnt); ++ if (!log_fp) + goto error; + + while (true) { +@@ -1014,12 +1014,12 @@ static void *dispatch_thread(void *ctx) + if (msg_log.type != MSG_TEST_LOG) + goto error; + +- fprintf(log_fd, "%s", msg_log.test_log.log_buf); ++ fprintf(log_fp, "%s", msg_log.test_log.log_buf); + if (msg_log.test_log.is_last) + break; + } +- fclose(log_fd); +- log_fd = NULL; ++ fclose(log_fp); ++ log_fp = NULL; + } + /* output log */ + { +@@ -1045,8 +1045,8 @@ error: + if (env.debug) + fprintf(stderr, "[%d]: Protocol/IO error: %s.\n", data->worker_id, strerror(errno)); + +- if (log_fd) +- fclose(log_fd); ++ if (log_fp) ++ fclose(log_fp); + done: + { + struct msg msg_exit; diff --git a/patches.suse/selftests-kvm-Check-whether-SIDA-memop-fails-for-normal-guests b/patches.suse/selftests-kvm-Check-whether-SIDA-memop-fails-for-normal-guests new file mode 100644 index 0000000..3bf62b0 --- /dev/null +++ b/patches.suse/selftests-kvm-Check-whether-SIDA-memop-fails-for-normal-guests @@ -0,0 +1,48 @@ +From: Thomas Huth +Date: Tue, 15 Feb 2022 08:48:24 +0100 +Subject: selftests: kvm: Check whether SIDA memop fails for normal guests +Git-commit: 05515d341fe5c3674ab944fe50c4bde8f9727723 +Patch-mainline: v5.18-rc1 +References: jsc#PED-579 + +Commit 2c212e1baedc ("KVM: s390: Return error on SIDA memop on normal +guest") fixed the behavior of the SIDA memops for normal guests. It +would be nice to have a way to test whether the current kernel has +the fix applied or not. Thus add a check to the KVM selftests for +these two memops. + +Signed-off-by: Thomas Huth +Reviewed-by: Janis Schoetterl-Glausch +Reviewed-by: Claudio Imbrenda +Reviewed-by: Shuah Khan +Link: https://lore.kernel.org/r/20220215074824.188440-1-thuth@redhat.com +Signed-off-by: Christian Borntraeger +Acked-by: Petr Tesarik +--- + tools/testing/selftests/kvm/s390x/memop.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +--- a/tools/testing/selftests/kvm/s390x/memop.c ++++ b/tools/testing/selftests/kvm/s390x/memop.c +@@ -160,6 +160,21 @@ int main(int argc, char *argv[]) + run->psw_mask &= ~(3UL << (63 - 17)); /* Disable AR mode */ + vcpu_run(vm, VCPU_ID); /* Run to sync new state */ + ++ /* Check that the SIDA calls are rejected for non-protected guests */ ++ ksmo.gaddr = 0; ++ ksmo.flags = 0; ++ ksmo.size = 8; ++ ksmo.op = KVM_S390_MEMOP_SIDA_READ; ++ ksmo.buf = (uintptr_t)mem1; ++ ksmo.sida_offset = 0x1c0; ++ rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ TEST_ASSERT(rv == -1 && errno == EINVAL, ++ "ioctl does not reject SIDA_READ in non-protected mode"); ++ ksmo.op = KVM_S390_MEMOP_SIDA_WRITE; ++ rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); ++ TEST_ASSERT(rv == -1 && errno == EINVAL, ++ "ioctl does not reject SIDA_WRITE in non-protected mode"); ++ + kvm_vm_free(vm); + + return 0; diff --git a/patches.suse/skbuff-Move-conditional-preprocessor-directives-out-.patch b/patches.suse/skbuff-Move-conditional-preprocessor-directives-out-.patch new file mode 100644 index 0000000..97e3f8b --- /dev/null +++ b/patches.suse/skbuff-Move-conditional-preprocessor-directives-out-.patch @@ -0,0 +1,153 @@ +From: Kees Cook +Date: Sat, 20 Nov 2021 16:31:48 -0800 +Subject: skbuff: Move conditional preprocessor directives out of struct + sk_buff +Patch-mainline: v5.17-rc1 +Git-commit: fba84957e2e2e201cf4e352efe0c7cac0fbb5d5d +References: jsc#PED-1368 + +In preparation for using the struct_group() macro in struct sk_buff, +move the conditional preprocessor directives out of the region of struct +sk_buff that will be enclosed by struct_group(). While GCC and Clang are +happy with conditional preprocessor directives here, sparse is not, even +under -Wno-directive-within-macro[1], as would be seen under a C=1 build: + +net/core/filter.c: note: in included file (through include/linux/netlink.h, include/linux/sock_diag.h): +./include/linux/skbuff.h:820:1: warning: directive in macro's argument list +./include/linux/skbuff.h:822:1: warning: directive in macro's argument list +./include/linux/skbuff.h:846:1: warning: directive in macro's argument list +./include/linux/skbuff.h:848:1: warning: directive in macro's argument list + +Additionally remove empty macro argument definitions and usage. + +"objdump -d" shows no object code differences. + +[1] https://www.spinics.net/lists/linux-sparse/msg10857.html + +Signed-off-by: Kees Cook +Signed-off-by: David S. Miller +Acked-by: Shung-Hsi Yu +--- + include/linux/skbuff.h | 36 +++++++++++++++++++----------------- + net/core/filter.c | 10 +++++----- + 2 files changed, 24 insertions(+), 22 deletions(-) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -789,7 +789,7 @@ struct sk_buff { + #else + #define CLONED_MASK 1 + #endif +-#define CLONED_OFFSET() offsetof(struct sk_buff, __cloned_offset) ++#define CLONED_OFFSET offsetof(struct sk_buff, __cloned_offset) + + /* private: */ + __u8 __cloned_offset[0]; +@@ -812,18 +812,10 @@ struct sk_buff { + __u32 headers_start[0]; + /* public: */ + +-/* if you move pkt_type around you also must adapt those constants */ +-#ifdef __BIG_ENDIAN_BITFIELD +-#define PKT_TYPE_MAX (7 << 5) +-#else +-#define PKT_TYPE_MAX 7 +-#endif +-#define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset) +- + /* private: */ + __u8 __pkt_type_offset[0]; + /* public: */ +- __u8 pkt_type:3; ++ __u8 pkt_type:3; /* see PKT_TYPE_MAX */ + __u8 ignore_df:1; + __u8 nf_trace:1; + __u8 ip_summed:2; +@@ -839,16 +831,10 @@ struct sk_buff { + __u8 encap_hdr_csum:1; + __u8 csum_valid:1; + +-#ifdef __BIG_ENDIAN_BITFIELD +-#define PKT_VLAN_PRESENT_BIT 7 +-#else +-#define PKT_VLAN_PRESENT_BIT 0 +-#endif +-#define PKT_VLAN_PRESENT_OFFSET() offsetof(struct sk_buff, __pkt_vlan_present_offset) + /* private: */ + __u8 __pkt_vlan_present_offset[0]; + /* public: */ +- __u8 vlan_present:1; ++ __u8 vlan_present:1; /* See PKT_VLAN_PRESENT_BIT */ + __u8 csum_complete_sw:1; + __u8 csum_level:2; + __u8 csum_not_inet:1; +@@ -944,6 +930,22 @@ struct sk_buff { + #endif + }; + ++/* if you move pkt_type around you also must adapt those constants */ ++#ifdef __BIG_ENDIAN_BITFIELD ++#define PKT_TYPE_MAX (7 << 5) ++#else ++#define PKT_TYPE_MAX 7 ++#endif ++#define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset) ++ ++/* if you move pkt_vlan_present around you also must adapt these constants */ ++#ifdef __BIG_ENDIAN_BITFIELD ++#define PKT_VLAN_PRESENT_BIT 7 ++#else ++#define PKT_VLAN_PRESENT_BIT 0 ++#endif ++#define PKT_VLAN_PRESENT_OFFSET offsetof(struct sk_buff, __pkt_vlan_present_offset) ++ + #ifdef __KERNEL__ + /* + * Handling routines are only of interest to the kernel +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -301,7 +301,7 @@ static u32 convert_skb_access(int skb_fi + break; + + case SKF_AD_PKTTYPE: +- *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET()); ++ *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET); + *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX); + #ifdef __BIG_ENDIAN_BITFIELD + *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5); +@@ -323,7 +323,7 @@ static u32 convert_skb_access(int skb_fi + offsetof(struct sk_buff, vlan_tci)); + break; + case SKF_AD_VLAN_TAG_PRESENT: +- *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_VLAN_PRESENT_OFFSET()); ++ *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_VLAN_PRESENT_OFFSET); + if (PKT_VLAN_PRESENT_BIT) + *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, PKT_VLAN_PRESENT_BIT); + if (PKT_VLAN_PRESENT_BIT < 7) +@@ -8046,7 +8046,7 @@ static int bpf_unclone_prologue(struct b + * (Fast-path, otherwise approximation that we might be + * a clone, do the rest in helper.) + */ +- *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET()); ++ *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET); + *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK); + *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7); + +@@ -8634,7 +8634,7 @@ static u32 bpf_convert_ctx_access(enum b + case offsetof(struct __sk_buff, pkt_type): + *target_size = 1; + *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg, +- PKT_TYPE_OFFSET()); ++ PKT_TYPE_OFFSET); + *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX); + #ifdef __BIG_ENDIAN_BITFIELD + *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5); +@@ -8659,7 +8659,7 @@ static u32 bpf_convert_ctx_access(enum b + case offsetof(struct __sk_buff, vlan_present): + *target_size = 1; + *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg, +- PKT_VLAN_PRESENT_OFFSET()); ++ PKT_VLAN_PRESENT_OFFSET); + if (PKT_VLAN_PRESENT_BIT) + *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, PKT_VLAN_PRESENT_BIT); + if (PKT_VLAN_PRESENT_BIT < 7) diff --git a/patches.suse/squashfs-use-bdev_nr_bytes-instead-of-open-coding-it.patch b/patches.suse/squashfs-use-bdev_nr_bytes-instead-of-open-coding-it.patch new file mode 100644 index 0000000..15a7ce6 --- /dev/null +++ b/patches.suse/squashfs-use-bdev_nr_bytes-instead-of-open-coding-it.patch @@ -0,0 +1,45 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:23 +0200 +Subject: [PATCH] squashfs: use bdev_nr_bytes instead of open coding it +Git-commit: be9a7b3e15916fd3710bfd383e8ecffc0416e919 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helper to read the block device size. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Acked-by: Phillip Lougher +Link: https://lore.kernel.org/r/20211018101130.1838532-24-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/squashfs/super.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c +index 60d6951915f4..bb44ff4c5cc6 100644 +--- a/fs/squashfs/super.c ++++ b/fs/squashfs/super.c +@@ -16,6 +16,7 @@ + + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + ++#include + #include + #include + #include +@@ -179,8 +180,8 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) + /* Check the filesystem does not extend beyond the end of the + block device */ + msblk->bytes_used = le64_to_cpu(sblk->bytes_used); +- if (msblk->bytes_used < 0 || msblk->bytes_used > +- i_size_read(sb->s_bdev->bd_inode)) ++ if (msblk->bytes_used < 0 || ++ msblk->bytes_used > bdev_nr_bytes(sb->s_bdev)) + goto failed_mount; + + /* Check block size for sanity */ +-- +2.35.3 + diff --git a/patches.suse/suse-hv-guest-os-id.patch b/patches.suse/suse-hv-guest-os-id.patch index 8326bb2..5ec3ed7 100644 --- a/patches.suse/suse-hv-guest-os-id.patch +++ b/patches.suse/suse-hv-guest-os-id.patch @@ -9,18 +9,13 @@ Provide the guest OS id. A better change is pending. Acked-by: Olaf Hering ---- - arch/x86/hyperv/hv_init.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - ---- a/arch/x86/hyperv/hv_init.c -+++ b/arch/x86/hyperv/hv_init.c -@@ -420,7 +420,7 @@ void __init hyperv_init(void) - * 1. Register the guest ID - * 2. Enable the hypercall and register the hypercall page - */ -- guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); -+ guest_id = generate_guest_id(0x10 /* SUSE */, LINUX_VERSION_CODE, 0 /* -d of a.b.c-d */); - wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); +--- a/include/asm-generic/mshyperv.h ++++ b/include/asm-generic/mshyperv.h +@@ -110,6 +110,7 @@ static inline u64 hv_generate_guest_id(u + u64 guest_id; + + guest_id = (((u64)HV_LINUX_VENDOR_ID) << 48); ++ guest_id |= 0x10ULL /* SUSE */ << 48; + guest_id |= (kernel_version << 16); - /* Hyper-V requires to write guest os id via ghcb in SNP IVM. */ + return guest_id; diff --git a/patches.suse/swim-add-a-floppy-registration-bool-which-triggers-d.patch b/patches.suse/swim-add-a-floppy-registration-bool-which-triggers-d.patch new file mode 100644 index 0000000..1827539 --- /dev/null +++ b/patches.suse/swim-add-a-floppy-registration-bool-which-triggers-d.patch @@ -0,0 +1,68 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:02:57 -0700 +Subject: [PATCH] swim: add a floppy registration bool which triggers +Git-commit: 9ef41effb9b65088053e31c741c2a1ec97190117 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + del_gendisk() + +Instead of calling del_gendisk() on exit alone, let's add +a registration bool to the floppy disk state, this way this can +be done on the shared caller, swim_cleanup_floppy_disk(). + +This will be more useful in subsequent patches. Right now, this +just shuffles functionality out to a helper in a safe way. + +Signed-off-by: Luis Chamberlain +Link: https://lore.kernel.org/r/20210927220302.1073499-10-mcgrof@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/swim.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/drivers/block/swim.c b/drivers/block/swim.c +index 4f87d1af7c60..eed453528f4c 100644 +--- a/drivers/block/swim.c ++++ b/drivers/block/swim.c +@@ -185,6 +185,7 @@ struct floppy_state { + + int track; + int ref_count; ++ bool registered; + + struct gendisk *disk; + struct blk_mq_tag_set tag_set; +@@ -779,6 +780,9 @@ static void swim_cleanup_floppy_disk(struct floppy_state *fs) + if (!disk) + return; + ++ if (fs->registered) ++ del_gendisk(fs->disk); ++ + blk_cleanup_disk(disk); + blk_mq_free_tag_set(&fs->tag_set); + } +@@ -840,6 +844,7 @@ static int swim_floppy_init(struct swim_priv *swd) + swd->unit[drive].disk->private_data = &swd->unit[drive]; + set_capacity(swd->unit[drive].disk, 2880); + add_disk(swd->unit[drive].disk); ++ swd->unit[drive].registered = true; + } + + return 0; +@@ -916,10 +921,8 @@ static int swim_remove(struct platform_device *dev) + int drive; + struct resource *res; + +- for (drive = 0; drive < swd->floppy_count; drive++) { +- del_gendisk(swd->unit[drive].disk); ++ for (drive = 0; drive < swd->floppy_count; drive++) + swim_cleanup_floppy_disk(&swd->unit[drive]); +- } + + unregister_blkdev(FLOPPY_MAJOR, "fd"); + +-- +2.35.3 + diff --git a/patches.suse/swim-add-error-handling-support-for-add_disk.patch b/patches.suse/swim-add-error-handling-support-for-add_disk.patch new file mode 100644 index 0000000..9f5485e --- /dev/null +++ b/patches.suse/swim-add-error-handling-support-for-add_disk.patch @@ -0,0 +1,42 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:02:58 -0700 +Subject: [PATCH] swim: add error handling support for add_disk() +Git-commit: 625a28a7e60c7c026e4d2929c49c8461fad4b0f3 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +Since we have a caller to do our unwinding for the disk, +and this is already dealt with safely we can re-use our +existing error path goto label which already deals with +the cleanup. + +Signed-off-by: Luis Chamberlain +Link: https://lore.kernel.org/r/20210927220302.1073499-11-mcgrof@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/swim.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/block/swim.c b/drivers/block/swim.c +index eed453528f4c..821594cd1315 100644 +--- a/drivers/block/swim.c ++++ b/drivers/block/swim.c +@@ -843,7 +843,9 @@ static int swim_floppy_init(struct swim_priv *swd) + swd->unit[drive].disk->events = DISK_EVENT_MEDIA_CHANGE; + swd->unit[drive].disk->private_data = &swd->unit[drive]; + set_capacity(swd->unit[drive].disk, 2880); +- add_disk(swd->unit[drive].disk); ++ err = add_disk(swd->unit[drive].disk); ++ if (err) ++ goto exit_put_disks; + swd->unit[drive].registered = true; + } + +-- +2.35.3 + diff --git a/patches.suse/swim-add-helper-for-disk-cleanup.patch b/patches.suse/swim-add-helper-for-disk-cleanup.patch new file mode 100644 index 0000000..908b676 --- /dev/null +++ b/patches.suse/swim-add-helper-for-disk-cleanup.patch @@ -0,0 +1,72 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:02:56 -0700 +Subject: [PATCH] swim: add helper for disk cleanup +Git-commit: 4e9abe72530a2baf5f80d60e8d0bcdb84964d2e4 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Disk cleanup can be shared between exit and bringup. Use a +helper to do the work required. The only functional change at +this point is we're being overly paraoid on exit to check for +a null disk as well now, and this should be safe. + +We'll later expand on this, this change just makes subsequent +changes easier to read. + +Signed-off-by: Luis Chamberlain +Link: https://lore.kernel.org/r/20210927220302.1073499-9-mcgrof@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/swim.c | 21 +++++++++++++-------- + 1 file changed, 13 insertions(+), 8 deletions(-) + +diff --git a/drivers/block/swim.c b/drivers/block/swim.c +index 868d59476065..4f87d1af7c60 100644 +--- a/drivers/block/swim.c ++++ b/drivers/block/swim.c +@@ -772,6 +772,17 @@ static const struct blk_mq_ops swim_mq_ops = { + .queue_rq = swim_queue_rq, + }; + ++static void swim_cleanup_floppy_disk(struct floppy_state *fs) ++{ ++ struct gendisk *disk = fs->disk; ++ ++ if (!disk) ++ return; ++ ++ blk_cleanup_disk(disk); ++ blk_mq_free_tag_set(&fs->tag_set); ++} ++ + static int swim_floppy_init(struct swim_priv *swd) + { + int err; +@@ -836,12 +847,7 @@ static int swim_floppy_init(struct swim_priv *swd) + exit_put_disks: + unregister_blkdev(FLOPPY_MAJOR, "fd"); + do { +- struct gendisk *disk = swd->unit[drive].disk; +- +- if (!disk) +- continue; +- blk_cleanup_disk(disk); +- blk_mq_free_tag_set(&swd->unit[drive].tag_set); ++ swim_cleanup_floppy_disk(&swd->unit[drive]); + } while (drive--); + return err; + } +@@ -912,8 +918,7 @@ static int swim_remove(struct platform_device *dev) + + for (drive = 0; drive < swd->floppy_count; drive++) { + del_gendisk(swd->unit[drive].disk); +- blk_cleanup_disk(swd->unit[drive].disk); +- blk_mq_free_tag_set(&swd->unit[drive].tag_set); ++ swim_cleanup_floppy_disk(&swd->unit[drive]); + } + + unregister_blkdev(FLOPPY_MAJOR, "fd"); +-- +2.35.3 + diff --git a/patches.suse/swim-simplify-using-blk_cleanup_disk-on-swim_remove.patch b/patches.suse/swim-simplify-using-blk_cleanup_disk-on-swim_remove.patch new file mode 100644 index 0000000..469a198 --- /dev/null +++ b/patches.suse/swim-simplify-using-blk_cleanup_disk-on-swim_remove.patch @@ -0,0 +1,36 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:02:55 -0700 +Subject: [PATCH] swim: simplify using blk_cleanup_disk() on swim_remove() +Git-commit: b76a30c254d987b4ae7d47415081121d4c0a7423 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We can simplify swim_remove() by using one call instead of two, +just as other drivers do. Use that pattern. + +Signed-off-by: Luis Chamberlain +Link: https://lore.kernel.org/r/20210927220302.1073499-8-mcgrof@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/swim.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/drivers/block/swim.c b/drivers/block/swim.c +index 3911d0833e1b..868d59476065 100644 +--- a/drivers/block/swim.c ++++ b/drivers/block/swim.c +@@ -912,9 +912,8 @@ static int swim_remove(struct platform_device *dev) + + for (drive = 0; drive < swd->floppy_count; drive++) { + del_gendisk(swd->unit[drive].disk); +- blk_cleanup_queue(swd->unit[drive].disk->queue); ++ blk_cleanup_disk(swd->unit[drive].disk); + blk_mq_free_tag_set(&swd->unit[drive].tag_set); +- put_disk(swd->unit[drive].disk); + } + + unregister_blkdev(FLOPPY_MAJOR, "fd"); +-- +2.35.3 + diff --git a/patches.suse/swim3-add-missing-major.h-include.patch b/patches.suse/swim3-add-missing-major.h-include.patch new file mode 100644 index 0000000..3dd5ed1 --- /dev/null +++ b/patches.suse/swim3-add-missing-major.h-include.patch @@ -0,0 +1,35 @@ +From: Jens Axboe +Date: Fri, 1 Oct 2021 19:23:26 -0600 +Subject: [PATCH] swim3: add missing major.h include +Git-commit: 1f0a258f114b5b152855d31179f902cb10bdfb59 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +swim3 got this through blkdev.h previously, but blkdev.h is not including +it anymore. Include it specifically for the driver, otherwise FLOPPY_MAJOR +is undefined and breaks the compile on PPC if swim3 is configured. + +Fixes: b81e0c2372e6 ("block: drop unused includes in ") +Reported-by: Naresh Kamboju +Acked-by: Randy Dunlap # build-tested +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/swim3.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c +index f7e3482e846b..4b91c9aa5892 100644 +--- a/drivers/block/swim3.c ++++ b/drivers/block/swim3.c +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + #include + #include + #include +-- +2.35.3 + diff --git a/patches.suse/sx8-fix-an-error-code-in-carm_init_one.patch b/patches.suse/sx8-fix-an-error-code-in-carm_init_one.patch new file mode 100644 index 0000000..770753b --- /dev/null +++ b/patches.suse/sx8-fix-an-error-code-in-carm_init_one.patch @@ -0,0 +1,38 @@ +From: Dan Carpenter +Date: Fri, 1 Oct 2021 15:27:22 +0300 +Subject: [PATCH] sx8: fix an error code in carm_init_one() +Git-commit: 5deae20c552aec0750cc7b95e84ca94121aac3b3 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Return a negative error code here on this error path instead of +returning success. + +Fixes: 637208e74a86 ("block/sx8: add error handling support for add_disk()") +Signed-off-by: Dan Carpenter +Link: https://lore.kernel.org/r/20211001122722.GC2283@kili +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/sx8.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c +index 1c79248c4826..d1676fe0da1a 100644 +--- a/drivers/block/sx8.c ++++ b/drivers/block/sx8.c +@@ -1511,8 +1511,10 @@ static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) + DPRINTK("waiting for probe_comp\n"); + host->probe_err = -ENODEV; + wait_for_completion(&host->probe_comp); +- if (host->probe_err) ++ if (host->probe_err) { ++ rc = host->probe_err; + goto err_out_free_irq; ++ } + + printk(KERN_INFO "%s: pci %s, ports %d, io %llx, irq %u, major %d\n", + host->name, pci_name(pdev), (int) CARM_MAX_PORTS, +-- +2.35.3 + diff --git a/patches.suse/target-iblock-use-bdev_nr_bytes-instead-of-open-codi.patch b/patches.suse/target-iblock-use-bdev_nr_bytes-instead-of-open-codi.patch new file mode 100644 index 0000000..89fe024 --- /dev/null +++ b/patches.suse/target-iblock-use-bdev_nr_bytes-instead-of-open-codi.patch @@ -0,0 +1,38 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:08 +0200 +Subject: [PATCH] target/iblock: use bdev_nr_bytes instead of open coding it +Git-commit: 64f0f42671b48ec30a3203818e26346d5b4ea5fa +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the proper helper to read the block device size. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Reviewed-by: Chaitanya Kulkarni +Link: https://lore.kernel.org/r/20211018101130.1838532-9-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/target/target_core_iblock.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c +index 31df20abe141..b1ef041cacd8 100644 +--- a/drivers/target/target_core_iblock.c ++++ b/drivers/target/target_core_iblock.c +@@ -232,9 +232,9 @@ static unsigned long long iblock_emulate_read_cap_with_block_size( + struct block_device *bd, + struct request_queue *q) + { +- unsigned long long blocks_long = (div_u64(i_size_read(bd->bd_inode), +- bdev_logical_block_size(bd)) - 1); + u32 block_size = bdev_logical_block_size(bd); ++ unsigned long long blocks_long = ++ div_u64(bdev_nr_bytes(bd), block_size) - 1; + + if (block_size == dev->dev_attrib.block_size) + return blocks_long; +-- +2.35.3 + diff --git a/patches.suse/tcp-make-tcp_read_sock-more-robust.patch b/patches.suse/tcp-make-tcp_read_sock-more-robust.patch new file mode 100644 index 0000000..9c034bc --- /dev/null +++ b/patches.suse/tcp-make-tcp_read_sock-more-robust.patch @@ -0,0 +1,43 @@ +From: Eric Dumazet +Date: Wed, 2 Mar 2022 08:17:23 -0800 +Subject: tcp: make tcp_read_sock() more robust +Patch-mainline: v5.17-rc7 +Git-commit: e3d5ea2c011ecb16fb94c56a659364e6b30fac94 +References: jsc#PED-1368 + +If recv_actor() returns an incorrect value, tcp_read_sock() +might loop forever. + +Instead, issue a one time warning and make sure to make progress. + +Signed-off-by: Eric Dumazet +Acked-by: John Fastabend +Acked-by: Jakub Sitnicki +Acked-by: Daniel Borkmann +Link: https://lore.kernel.org/r/20220302161723.3910001-2-eric.dumazet@gmail.com +Signed-off-by: Jakub Kicinski +Acked-by: Shung-Hsi Yu +--- + net/ipv4/tcp.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -1694,11 +1694,13 @@ int tcp_read_sock(struct sock *sk, read_ + if (!copied) + copied = used; + break; +- } else if (used <= len) { +- seq += used; +- copied += used; +- offset += used; + } ++ if (WARN_ON_ONCE(used > len)) ++ used = len; ++ seq += used; ++ copied += used; ++ offset += used; ++ + /* If recv_actor drops the lock (e.g. TCP splice + * receive) the skb pointer might be invalid when + * getting here: tcp_collapse might have deleted it diff --git a/patches.suse/thunderbolt-Check-router-generation-before-connectin.patch b/patches.suse/thunderbolt-Check-router-generation-before-connectin.patch new file mode 100644 index 0000000..7ae995d --- /dev/null +++ b/patches.suse/thunderbolt-Check-router-generation-before-connectin.patch @@ -0,0 +1,49 @@ +From 93a3c0d4e8bfbb15145e5dd7da68a3de4b904aba Mon Sep 17 00:00:00 2001 +From: Mika Westerberg +Date: Tue, 14 Jun 2022 18:53:59 +0300 +Subject: [PATCH] thunderbolt: Check router generation before connecting xHCI +Git-commit: 93a3c0d4e8bfbb15145e5dd7da68a3de4b904aba +Patch-mainline: v6.0-rc4 +References: git-fixes + +Only Thunderbolt 3 routers need the xHCI connection flow. This also +ensures the router actually has both lane adapters (1 and 3). While +there move declaration of the boolean variables inside the block where +they are being used. + +Fixes: 30a4eca69b76 ("thunderbolt: Add internal xHCI connect flows for Thunderbolt 3 devices") +Cc: stable@vger.kernel.org +Signed-off-by: Mika Westerberg +Acked-by: Takashi Iwai + +--- + drivers/thunderbolt/switch.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c +index 244f8cd38b25..c63c1f4ff9dc 100644 +--- a/drivers/thunderbolt/switch.c ++++ b/drivers/thunderbolt/switch.c +@@ -3786,14 +3786,18 @@ int tb_switch_pcie_l1_enable(struct tb_switch *sw) + */ + int tb_switch_xhci_connect(struct tb_switch *sw) + { +- bool usb_port1, usb_port3, xhci_port1, xhci_port3; + struct tb_port *port1, *port3; + int ret; + ++ if (sw->generation != 3) ++ return 0; ++ + port1 = &sw->ports[1]; + port3 = &sw->ports[3]; + + if (tb_switch_is_alpine_ridge(sw)) { ++ bool usb_port1, usb_port3, xhci_port1, xhci_port3; ++ + usb_port1 = tb_lc_is_usb_plugged(port1); + usb_port3 = tb_lc_is_usb_plugged(port3); + xhci_port1 = tb_lc_is_xhci_connected(port1); +-- +2.35.3 + diff --git a/patches.suse/tools-Help-cross-building-with-clang.patch b/patches.suse/tools-Help-cross-building-with-clang.patch new file mode 100644 index 0000000..d087e69 --- /dev/null +++ b/patches.suse/tools-Help-cross-building-with-clang.patch @@ -0,0 +1,62 @@ +From: Jean-Philippe Brucker +Date: Thu, 16 Dec 2021 16:38:38 +0000 +Subject: tools: Help cross-building with clang +Patch-mainline: v5.17-rc1 +Git-commit: cebdb7374577ac6e14afb11311af8c2c44a259fa +References: jsc#PED-1368 + +Cross-compilation with clang uses the -target parameter rather than a +toolchain prefix. Just like the kernel Makefile, add that parameter to +CFLAGS when CROSS_COMPILE is set. + +Unlike the kernel Makefile, we use the --sysroot and --gcc-toolchain +options because unlike the kernel, tools require standard libraries. +Commit c91d4e47e10e ("Makefile: Remove '--gcc-toolchain' flag") provides +some background about --gcc-toolchain. Normally clang finds on its own +the additional utilities and libraries that it needs (for example GNU ld +or glibc). On some systems however, this autodetection doesn't work. +There, our only recourse is asking GCC directly, and pass the result to +--sysroot and --gcc-toolchain. Of course that only works when a cross +GCC is available. + +Autodetection worked fine on Debian, but to use the aarch64-linux-gnu +toolchain from Archlinux I needed both --sysroot (for crt1.o) and +--gcc-toolchain (for crtbegin.o, -lgcc). The --prefix parameter wasn't +needed there, but it might be useful on other distributions. + +Use the CLANG_CROSS_FLAGS variable instead of CLANG_FLAGS because it +allows tools such as bpftool, that need to build both host and target +binaries, to easily filter out the cross-build flags from CFLAGS. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: Andrii Nakryiko +Acked-by: Quentin Monnet +Acked-by: Nick Desaulniers +Link: https://lore.kernel.org/bpf/20211216163842.829836-2-jean-philippe@linaro.org +Acked-by: Shung-Hsi Yu +--- + tools/scripts/Makefile.include | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +--- a/tools/scripts/Makefile.include ++++ b/tools/scripts/Makefile.include +@@ -87,7 +87,18 @@ LLVM_STRIP ?= llvm-strip + + ifeq ($(CC_NO_CLANG), 1) + EXTRA_WARNINGS += -Wstrict-aliasing=3 +-endif ++ ++else ifneq ($(CROSS_COMPILE),) ++CLANG_CROSS_FLAGS := --target=$(notdir $(CROSS_COMPILE:%-=%)) ++GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)gcc)) ++ifneq ($(GCC_TOOLCHAIN_DIR),) ++CLANG_CROSS_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE)) ++CLANG_CROSS_FLAGS += --sysroot=$(shell $(CROSS_COMPILE)gcc -print-sysroot) ++CLANG_CROSS_FLAGS += --gcc-toolchain=$(realpath $(GCC_TOOLCHAIN_DIR)/..) ++endif # GCC_TOOLCHAIN_DIR ++CFLAGS += $(CLANG_CROSS_FLAGS) ++AFLAGS += $(CLANG_CROSS_FLAGS) ++endif # CROSS_COMPILE + + # Hack to avoid type-punned warnings on old systems such as RHEL5: + # We should be changing CFLAGS and checking gcc version, but this diff --git a/patches.suse/tools-bpf-Rename-struct-event-to-avoid-naming-confli.patch b/patches.suse/tools-bpf-Rename-struct-event-to-avoid-naming-confli.patch new file mode 100644 index 0000000..587c2c0 --- /dev/null +++ b/patches.suse/tools-bpf-Rename-struct-event-to-avoid-naming-confli.patch @@ -0,0 +1,65 @@ +From: "Naveen N. Rao" +Date: Thu, 6 Jan 2022 17:15:08 +0530 +Subject: tools/bpf: Rename 'struct event' to avoid naming conflict +Patch-mainline: v5.17-rc2 +Git-commit: 88a71086c48ae98e93c0208044827621e9717f7e +References: jsc#PED-1368 + +On ppc64le, trying to build bpf seltests throws the below warning: + In file included from runqslower.bpf.c:5: + ./runqslower.h:7:8: error: redefinition of 'event' + struct event { + ^ + /home/naveen/linux/tools/testing/selftests/bpf/tools/build/runqslower/vmlinux.h:156602:8: + note: previous definition is here + struct event { + ^ + +This happens since 'struct event' is defined in +drivers/net/ethernet/alteon/acenic.h . Rename the one in runqslower to a +more appropriate 'runq_event' to avoid the naming conflict. + +Signed-off-by: Naveen N. Rao +Acked-by: Daniel Borkmann +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/c13cb3767d26257ca4387b8296b632b433a58db6.1641468127.git.naveen.n.rao@linux.vnet.ibm.com +Acked-by: Shung-Hsi Yu +--- + tools/bpf/runqslower/runqslower.bpf.c | 2 +- + tools/bpf/runqslower/runqslower.c | 2 +- + tools/bpf/runqslower/runqslower.h | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +--- a/tools/bpf/runqslower/runqslower.bpf.c ++++ b/tools/bpf/runqslower/runqslower.bpf.c +@@ -68,7 +68,7 @@ int handle__sched_switch(u64 *ctx) + */ + struct task_struct *prev = (struct task_struct *)ctx[1]; + struct task_struct *next = (struct task_struct *)ctx[2]; +- struct event event = {}; ++ struct runq_event event = {}; + u64 *tsp, delta_us; + long state; + u32 pid; +--- a/tools/bpf/runqslower/runqslower.c ++++ b/tools/bpf/runqslower/runqslower.c +@@ -100,7 +100,7 @@ static int bump_memlock_rlimit(void) + + void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) + { +- const struct event *e = data; ++ const struct runq_event *e = data; + struct tm *tm; + char ts[32]; + time_t t; +--- a/tools/bpf/runqslower/runqslower.h ++++ b/tools/bpf/runqslower/runqslower.h +@@ -4,7 +4,7 @@ + + #define TASK_COMM_LEN 16 + +-struct event { ++struct runq_event { + char task[TASK_COMM_LEN]; + __u64 delta_us; + pid_t pid; diff --git a/patches.suse/tools-bpf-bpftool-skeleton-replace-bpf_probe_read_ke.patch b/patches.suse/tools-bpf-bpftool-skeleton-replace-bpf_probe_read_ke.patch new file mode 100644 index 0000000..3c80c64 --- /dev/null +++ b/patches.suse/tools-bpf-bpftool-skeleton-replace-bpf_probe_read_ke.patch @@ -0,0 +1,48 @@ +From: Yafang Shao +Date: Wed, 19 Jan 2022 18:08:36 -0800 +Subject: tools/bpf/bpftool/skeleton: replace bpf_probe_read_kernel with + bpf_probe_read_kernel_str to get task comm +Patch-mainline: v5.17-rc1 +Git-commit: 4cfb943537ed3716daf668ca5a33d3ce667f82a3 +References: jsc#PED-1368 + +bpf_probe_read_kernel_str() will add a nul terminator to the dst, then +we don't care about if the dst size is big enough. + +Link: https://lkml.kernel.org/r/20211120112738.45980-7-laoar.shao@gmail.com +Signed-off-by: Yafang Shao +Acked-by: Andrii Nakryiko +Reviewed-by: David Hildenbrand +Cc: Mathieu Desnoyers +Cc: Arnaldo Carvalho de Melo +Cc: Alexei Starovoitov +Cc: Andrii Nakryiko +Cc: Michal Miroslaw +Cc: Peter Zijlstra +Cc: Steven Rostedt +Cc: Matthew Wilcox +Cc: David Hildenbrand +Cc: Al Viro +Cc: Kees Cook +Cc: Petr Mladek +Cc: Dennis Dalessandro +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Acked-by: Shung-Hsi Yu +--- + tools/bpf/bpftool/skeleton/pid_iter.bpf.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c ++++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c +@@ -71,8 +71,8 @@ int iter(struct bpf_iter__task_file *ctx + + e.pid = task->tgid; + e.id = get_obj_id(file->private_data, obj_type); +- bpf_probe_read_kernel(&e.comm, sizeof(e.comm), +- task->group_leader->comm); ++ bpf_probe_read_kernel_str(&e.comm, sizeof(e.comm), ++ task->group_leader->comm); + bpf_seq_write(ctx->meta->seq, &e, sizeof(e)); + + return 0; diff --git a/patches.suse/tools-headers-UAPI-remove-stale-lirc.h.patch b/patches.suse/tools-headers-UAPI-remove-stale-lirc.h.patch new file mode 100644 index 0000000..b30cd1a --- /dev/null +++ b/patches.suse/tools-headers-UAPI-remove-stale-lirc.h.patch @@ -0,0 +1,265 @@ +From: Sean Young +Date: Mon, 24 Jan 2022 15:30:28 +0000 +Subject: tools headers UAPI: remove stale lirc.h +Patch-mainline: v5.17-rc3 +Git-commit: e2bcbd7769ee8f05e1b3d10848aace98973844e4 +References: jsc#PED-1368 + +The lirc.h file is an old copy of lirc.h from the kernel sources. It is +out of date, and the bpf lirc tests don't need a new copy anyway. As +long as /usr/include/linux/lirc.h is from kernel v5.2 or newer, the tests +will compile fine. + +Signed-off-by: Sean Young +Reviewed-by: Shuah Khan +Link: https://lore.kernel.org/r/20220124153028.394409-1-sean@mess.org +Signed-off-by: Alexei Starovoitov +Acked-by: Shung-Hsi Yu +--- + tools/include/uapi/linux/lirc.h | 229 --------------------- + tools/testing/selftests/bpf/test_lirc_mode2_user.c | 1 + 2 files changed, 230 deletions(-) + delete mode 100644 tools/include/uapi/linux/lirc.h + +--- a/tools/include/uapi/linux/lirc.h ++++ /dev/null +@@ -1,229 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +-/* +- * lirc.h - linux infrared remote control header file +- * last modified 2010/07/13 by Jarod Wilson +- */ +- +-#ifndef _LINUX_LIRC_H +-#define _LINUX_LIRC_H +- +-#include +-#include +- +-#define PULSE_BIT 0x01000000 +-#define PULSE_MASK 0x00FFFFFF +- +-#define LIRC_MODE2_SPACE 0x00000000 +-#define LIRC_MODE2_PULSE 0x01000000 +-#define LIRC_MODE2_FREQUENCY 0x02000000 +-#define LIRC_MODE2_TIMEOUT 0x03000000 +- +-#define LIRC_VALUE_MASK 0x00FFFFFF +-#define LIRC_MODE2_MASK 0xFF000000 +- +-#define LIRC_SPACE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_SPACE) +-#define LIRC_PULSE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_PULSE) +-#define LIRC_FREQUENCY(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY) +-#define LIRC_TIMEOUT(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT) +- +-#define LIRC_VALUE(val) ((val)&LIRC_VALUE_MASK) +-#define LIRC_MODE2(val) ((val)&LIRC_MODE2_MASK) +- +-#define LIRC_IS_SPACE(val) (LIRC_MODE2(val) == LIRC_MODE2_SPACE) +-#define LIRC_IS_PULSE(val) (LIRC_MODE2(val) == LIRC_MODE2_PULSE) +-#define LIRC_IS_FREQUENCY(val) (LIRC_MODE2(val) == LIRC_MODE2_FREQUENCY) +-#define LIRC_IS_TIMEOUT(val) (LIRC_MODE2(val) == LIRC_MODE2_TIMEOUT) +- +-/* used heavily by lirc userspace */ +-#define lirc_t int +- +-/*** lirc compatible hardware features ***/ +- +-#define LIRC_MODE2SEND(x) (x) +-#define LIRC_SEND2MODE(x) (x) +-#define LIRC_MODE2REC(x) ((x) << 16) +-#define LIRC_REC2MODE(x) ((x) >> 16) +- +-#define LIRC_MODE_RAW 0x00000001 +-#define LIRC_MODE_PULSE 0x00000002 +-#define LIRC_MODE_MODE2 0x00000004 +-#define LIRC_MODE_SCANCODE 0x00000008 +-#define LIRC_MODE_LIRCCODE 0x00000010 +- +- +-#define LIRC_CAN_SEND_RAW LIRC_MODE2SEND(LIRC_MODE_RAW) +-#define LIRC_CAN_SEND_PULSE LIRC_MODE2SEND(LIRC_MODE_PULSE) +-#define LIRC_CAN_SEND_MODE2 LIRC_MODE2SEND(LIRC_MODE_MODE2) +-#define LIRC_CAN_SEND_LIRCCODE LIRC_MODE2SEND(LIRC_MODE_LIRCCODE) +- +-#define LIRC_CAN_SEND_MASK 0x0000003f +- +-#define LIRC_CAN_SET_SEND_CARRIER 0x00000100 +-#define LIRC_CAN_SET_SEND_DUTY_CYCLE 0x00000200 +-#define LIRC_CAN_SET_TRANSMITTER_MASK 0x00000400 +- +-#define LIRC_CAN_REC_RAW LIRC_MODE2REC(LIRC_MODE_RAW) +-#define LIRC_CAN_REC_PULSE LIRC_MODE2REC(LIRC_MODE_PULSE) +-#define LIRC_CAN_REC_MODE2 LIRC_MODE2REC(LIRC_MODE_MODE2) +-#define LIRC_CAN_REC_SCANCODE LIRC_MODE2REC(LIRC_MODE_SCANCODE) +-#define LIRC_CAN_REC_LIRCCODE LIRC_MODE2REC(LIRC_MODE_LIRCCODE) +- +-#define LIRC_CAN_REC_MASK LIRC_MODE2REC(LIRC_CAN_SEND_MASK) +- +-#define LIRC_CAN_SET_REC_CARRIER (LIRC_CAN_SET_SEND_CARRIER << 16) +-#define LIRC_CAN_SET_REC_DUTY_CYCLE (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16) +- +-#define LIRC_CAN_SET_REC_DUTY_CYCLE_RANGE 0x40000000 +-#define LIRC_CAN_SET_REC_CARRIER_RANGE 0x80000000 +-#define LIRC_CAN_GET_REC_RESOLUTION 0x20000000 +-#define LIRC_CAN_SET_REC_TIMEOUT 0x10000000 +-#define LIRC_CAN_SET_REC_FILTER 0x08000000 +- +-#define LIRC_CAN_MEASURE_CARRIER 0x02000000 +-#define LIRC_CAN_USE_WIDEBAND_RECEIVER 0x04000000 +- +-#define LIRC_CAN_SEND(x) ((x)&LIRC_CAN_SEND_MASK) +-#define LIRC_CAN_REC(x) ((x)&LIRC_CAN_REC_MASK) +- +-#define LIRC_CAN_NOTIFY_DECODE 0x01000000 +- +-/*** IOCTL commands for lirc driver ***/ +- +-#define LIRC_GET_FEATURES _IOR('i', 0x00000000, __u32) +- +-#define LIRC_GET_SEND_MODE _IOR('i', 0x00000001, __u32) +-#define LIRC_GET_REC_MODE _IOR('i', 0x00000002, __u32) +-#define LIRC_GET_REC_RESOLUTION _IOR('i', 0x00000007, __u32) +- +-#define LIRC_GET_MIN_TIMEOUT _IOR('i', 0x00000008, __u32) +-#define LIRC_GET_MAX_TIMEOUT _IOR('i', 0x00000009, __u32) +- +-/* code length in bits, currently only for LIRC_MODE_LIRCCODE */ +-#define LIRC_GET_LENGTH _IOR('i', 0x0000000f, __u32) +- +-#define LIRC_SET_SEND_MODE _IOW('i', 0x00000011, __u32) +-#define LIRC_SET_REC_MODE _IOW('i', 0x00000012, __u32) +-/* Note: these can reset the according pulse_width */ +-#define LIRC_SET_SEND_CARRIER _IOW('i', 0x00000013, __u32) +-#define LIRC_SET_REC_CARRIER _IOW('i', 0x00000014, __u32) +-#define LIRC_SET_SEND_DUTY_CYCLE _IOW('i', 0x00000015, __u32) +-#define LIRC_SET_TRANSMITTER_MASK _IOW('i', 0x00000017, __u32) +- +-/* +- * when a timeout != 0 is set the driver will send a +- * LIRC_MODE2_TIMEOUT data packet, otherwise LIRC_MODE2_TIMEOUT is +- * never sent, timeout is disabled by default +- */ +-#define LIRC_SET_REC_TIMEOUT _IOW('i', 0x00000018, __u32) +- +-/* 1 enables, 0 disables timeout reports in MODE2 */ +-#define LIRC_SET_REC_TIMEOUT_REPORTS _IOW('i', 0x00000019, __u32) +- +-/* +- * if enabled from the next key press on the driver will send +- * LIRC_MODE2_FREQUENCY packets +- */ +-#define LIRC_SET_MEASURE_CARRIER_MODE _IOW('i', 0x0000001d, __u32) +- +-/* +- * to set a range use LIRC_SET_REC_CARRIER_RANGE with the +- * lower bound first and later LIRC_SET_REC_CARRIER with the upper bound +- */ +-#define LIRC_SET_REC_CARRIER_RANGE _IOW('i', 0x0000001f, __u32) +- +-#define LIRC_SET_WIDEBAND_RECEIVER _IOW('i', 0x00000023, __u32) +- +-/* +- * Return the recording timeout, which is either set by +- * the ioctl LIRC_SET_REC_TIMEOUT or by the kernel after setting the protocols. +- */ +-#define LIRC_GET_REC_TIMEOUT _IOR('i', 0x00000024, __u32) +- +-/* +- * struct lirc_scancode - decoded scancode with protocol for use with +- * LIRC_MODE_SCANCODE +- * +- * @timestamp: Timestamp in nanoseconds using CLOCK_MONOTONIC when IR +- * was decoded. +- * @flags: should be 0 for transmit. When receiving scancodes, +- * LIRC_SCANCODE_FLAG_TOGGLE or LIRC_SCANCODE_FLAG_REPEAT can be set +- * depending on the protocol +- * @rc_proto: see enum rc_proto +- * @keycode: the translated keycode. Set to 0 for transmit. +- * @scancode: the scancode received or to be sent +- */ +-struct lirc_scancode { +- __u64 timestamp; +- __u16 flags; +- __u16 rc_proto; +- __u32 keycode; +- __u64 scancode; +-}; +- +-/* Set if the toggle bit of rc-5 or rc-6 is enabled */ +-#define LIRC_SCANCODE_FLAG_TOGGLE 1 +-/* Set if this is a nec or sanyo repeat */ +-#define LIRC_SCANCODE_FLAG_REPEAT 2 +- +-/** +- * enum rc_proto - the Remote Controller protocol +- * +- * @RC_PROTO_UNKNOWN: Protocol not known +- * @RC_PROTO_OTHER: Protocol known but proprietary +- * @RC_PROTO_RC5: Philips RC5 protocol +- * @RC_PROTO_RC5X_20: Philips RC5x 20 bit protocol +- * @RC_PROTO_RC5_SZ: StreamZap variant of RC5 +- * @RC_PROTO_JVC: JVC protocol +- * @RC_PROTO_SONY12: Sony 12 bit protocol +- * @RC_PROTO_SONY15: Sony 15 bit protocol +- * @RC_PROTO_SONY20: Sony 20 bit protocol +- * @RC_PROTO_NEC: NEC protocol +- * @RC_PROTO_NECX: Extended NEC protocol +- * @RC_PROTO_NEC32: NEC 32 bit protocol +- * @RC_PROTO_SANYO: Sanyo protocol +- * @RC_PROTO_MCIR2_KBD: RC6-ish MCE keyboard +- * @RC_PROTO_MCIR2_MSE: RC6-ish MCE mouse +- * @RC_PROTO_RC6_0: Philips RC6-0-16 protocol +- * @RC_PROTO_RC6_6A_20: Philips RC6-6A-20 protocol +- * @RC_PROTO_RC6_6A_24: Philips RC6-6A-24 protocol +- * @RC_PROTO_RC6_6A_32: Philips RC6-6A-32 protocol +- * @RC_PROTO_RC6_MCE: MCE (Philips RC6-6A-32 subtype) protocol +- * @RC_PROTO_SHARP: Sharp protocol +- * @RC_PROTO_XMP: XMP protocol +- * @RC_PROTO_CEC: CEC protocol +- * @RC_PROTO_IMON: iMon Pad protocol +- * @RC_PROTO_RCMM12: RC-MM protocol 12 bits +- * @RC_PROTO_RCMM24: RC-MM protocol 24 bits +- * @RC_PROTO_RCMM32: RC-MM protocol 32 bits +- */ +-enum rc_proto { +- RC_PROTO_UNKNOWN = 0, +- RC_PROTO_OTHER = 1, +- RC_PROTO_RC5 = 2, +- RC_PROTO_RC5X_20 = 3, +- RC_PROTO_RC5_SZ = 4, +- RC_PROTO_JVC = 5, +- RC_PROTO_SONY12 = 6, +- RC_PROTO_SONY15 = 7, +- RC_PROTO_SONY20 = 8, +- RC_PROTO_NEC = 9, +- RC_PROTO_NECX = 10, +- RC_PROTO_NEC32 = 11, +- RC_PROTO_SANYO = 12, +- RC_PROTO_MCIR2_KBD = 13, +- RC_PROTO_MCIR2_MSE = 14, +- RC_PROTO_RC6_0 = 15, +- RC_PROTO_RC6_6A_20 = 16, +- RC_PROTO_RC6_6A_24 = 17, +- RC_PROTO_RC6_6A_32 = 18, +- RC_PROTO_RC6_MCE = 19, +- RC_PROTO_SHARP = 20, +- RC_PROTO_XMP = 21, +- RC_PROTO_CEC = 22, +- RC_PROTO_IMON = 23, +- RC_PROTO_RCMM12 = 24, +- RC_PROTO_RCMM24 = 25, +- RC_PROTO_RCMM32 = 26, +-}; +- +-#endif +--- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c ++++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c +@@ -28,7 +28,6 @@ + // 5. We can read keycode from same /dev/lirc device + + #include +-#include + #include + #include + #include diff --git a/patches.suse/tools-libbpf-Enable-cross-building-with-clang.patch b/patches.suse/tools-libbpf-Enable-cross-building-with-clang.patch new file mode 100644 index 0000000..5cf90db --- /dev/null +++ b/patches.suse/tools-libbpf-Enable-cross-building-with-clang.patch @@ -0,0 +1,39 @@ +From: Jean-Philippe Brucker +Date: Thu, 16 Dec 2021 16:38:40 +0000 +Subject: tools/libbpf: Enable cross-building with clang +Patch-mainline: v5.17-rc1 +Git-commit: 4980beb4cda2bc413a3a044e1851b0daaf137bf6 +References: jsc#PED-1368 + +Cross-building using clang requires passing the "-target" flag rather +than using the CROSS_COMPILE prefix. Makefile.include transforms +CROSS_COMPILE into CLANG_CROSS_FLAGS. Add them to the CFLAGS. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: Andrii Nakryiko +Acked-by: Quentin Monnet +Link: https://lore.kernel.org/bpf/20211216163842.829836-4-jean-philippe@linaro.org +Acked-by: Shung-Hsi Yu +--- + tools/lib/bpf/Makefile | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/tools/lib/bpf/Makefile ++++ b/tools/lib/bpf/Makefile +@@ -90,6 +90,7 @@ override CFLAGS += -Werror -Wall + override CFLAGS += $(INCLUDES) + override CFLAGS += -fvisibility=hidden + override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 ++override CFLAGS += $(CLANG_CROSS_FLAGS) + + # flags specific for shared library + SHLIB_FLAGS := -DSHARED -fPIC +@@ -162,7 +163,7 @@ $(BPF_HELPER_DEFS): $(srctree)/tools/inc + $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) + + $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) $(VERSION_SCRIPT) +- $(QUIET_LINK)$(CC) $(LDFLAGS) \ ++ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) \ + --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \ + -Wl,--version-script=$(VERSION_SCRIPT) $< -lelf -lz -o $@ + @ln -sf $(@F) $(OUTPUT)libbpf.so diff --git a/patches.suse/tools-perf-Stop-using-bpf_object__find_program_by_ti.patch b/patches.suse/tools-perf-Stop-using-bpf_object__find_program_by_ti.patch new file mode 100644 index 0000000..a691ae6 --- /dev/null +++ b/patches.suse/tools-perf-Stop-using-bpf_object__find_program_by_ti.patch @@ -0,0 +1,44 @@ +From: Kui-Feng Lee +Date: Mon, 13 Dec 2021 19:59:30 -0800 +Subject: tools/perf: Stop using bpf_object__find_program_by_title API. +Patch-mainline: v5.17-rc1 +Git-commit: b098f33692d75d184a3ab62095c376fd0e52d880 +References: jsc#PED-1368 + +bpf_obj__find_program_by_title() in libbpf is going to be deprecated. +Call bpf_object_for_each_program to find a program in the section with +a given name instead. + +Signed-off-by: Kui-Feng Lee +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20211214035931.1148209-4-kuifeng@fb.com +Acked-by: Shung-Hsi Yu +--- + tools/perf/builtin-trace.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +--- a/tools/perf/builtin-trace.c ++++ b/tools/perf/builtin-trace.c +@@ -3257,10 +3257,21 @@ static void trace__set_bpf_map_syscalls( + + static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name) + { ++ struct bpf_program *pos, *prog = NULL; ++ const char *sec_name; ++ + if (trace->bpf_obj == NULL) + return NULL; + +- return bpf_object__find_program_by_title(trace->bpf_obj, name); ++ bpf_object__for_each_program(pos, trace->bpf_obj) { ++ sec_name = bpf_program__section_name(pos); ++ if (sec_name && !strcmp(sec_name, name)) { ++ prog = pos; ++ break; ++ } ++ } ++ ++ return prog; + } + + static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc, diff --git a/patches.suse/tools-resolve_btf_ids-Close-ELF-file-on-error.patch b/patches.suse/tools-resolve_btf_ids-Close-ELF-file-on-error.patch new file mode 100644 index 0000000..cd14f96 --- /dev/null +++ b/patches.suse/tools-resolve_btf_ids-Close-ELF-file-on-error.patch @@ -0,0 +1,39 @@ +From: Andrii Nakryiko +Date: Tue, 23 Nov 2021 16:23:13 -0800 +Subject: tools/resolve_btf_ids: Close ELF file on error +Patch-mainline: v5.17-rc1 +Git-commit: 1144ab9bdf3430e1b5b3f22741e5283841951add +References: jsc#PED-1368 + +Fix one case where we don't do explicit clean up. + +Fixes: fbbb68de80a4 ("bpf: Add resolve_btfids tool to resolve BTF IDs in ELF object") +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20211124002325.1737739-2-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/bpf/resolve_btfids/main.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/tools/bpf/resolve_btfids/main.c ++++ b/tools/bpf/resolve_btfids/main.c +@@ -168,7 +168,7 @@ static struct btf_id *btf_id__find(struc + return NULL; + } + +-static struct btf_id* ++static struct btf_id * + btf_id__add(struct rb_root *root, char *name, bool unique) + { + struct rb_node **p = &root->rb_node; +@@ -732,7 +732,8 @@ int main(int argc, const char **argv) + if (obj.efile.idlist_shndx == -1 || + obj.efile.symbols_shndx == -1) { + pr_debug("Cannot find .BTF_ids or symbols sections, nothing to do\n"); +- return 0; ++ err = 0; ++ goto out; + } + + if (symbols_collect(&obj)) diff --git a/patches.suse/tools-resolve_btfids-Do-not-print-any-commands-when-.patch b/patches.suse/tools-resolve_btfids-Do-not-print-any-commands-when-.patch new file mode 100644 index 0000000..158c896 --- /dev/null +++ b/patches.suse/tools-resolve_btfids-Do-not-print-any-commands-when-.patch @@ -0,0 +1,43 @@ +From: Nathan Chancellor +Date: Tue, 1 Feb 2022 14:25:04 -0700 +Subject: tools/resolve_btfids: Do not print any commands when building + silently +Patch-mainline: v5.17-rc3 +Git-commit: 7f3bdbc3f13146eb9d07de81ea71f551587a384b +References: jsc#PED-1368 + +When building with 'make -s', there is some output from resolve_btfids: + +$ make -sj"$(nproc)" oldconfig prepare + MKDIR .../tools/bpf/resolve_btfids/libbpf/ + MKDIR .../tools/bpf/resolve_btfids//libsubcmd + LINK resolve_btfids + +Silent mode means that no information should be emitted about what is +currently being done. Use the $(silent) variable from Makefile.include +to avoid defining the msg macro so that there is no information printed. + +Fixes: fbbb68de80a4 ("bpf: Add resolve_btfids tool to resolve BTF IDs in ELF object") +Signed-off-by: Nathan Chancellor +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20220201212503.731732-1-nathan@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/bpf/resolve_btfids/Makefile | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/tools/bpf/resolve_btfids/Makefile ++++ b/tools/bpf/resolve_btfids/Makefile +@@ -9,7 +9,11 @@ ifeq ($(V),1) + msg = + else + Q = @ +- msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; ++ ifeq ($(silent),1) ++ msg = ++ else ++ msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; ++ endif + MAKEFLAGS=--no-print-directory + endif + diff --git a/patches.suse/tools-resolve_btfids-Support-cross-building-the-kern.patch b/patches.suse/tools-resolve_btfids-Support-cross-building-the-kern.patch new file mode 100644 index 0000000..4d8fbba --- /dev/null +++ b/patches.suse/tools-resolve_btfids-Support-cross-building-the-kern.patch @@ -0,0 +1,34 @@ +From: Jean-Philippe Brucker +Date: Thu, 16 Dec 2021 16:38:39 +0000 +Subject: tools/resolve_btfids: Support cross-building the kernel with clang +Patch-mainline: v5.17-rc1 +Git-commit: bf1be903461a404a9d1c720b0872501ca35abc89 +References: jsc#PED-1368 + +The CROSS_COMPILE variable may be present during resolve_btfids build if +the kernel is being cross-built. Since resolve_btfids is always executed +on the host, we set CC to HOSTCC in order to use the host toolchain when +cross-building with GCC. But instead of a toolchain prefix, cross-build +with clang uses a "-target" parameter, which Makefile.include deduces +from the CROSS_COMPILE variable. In order to avoid cross-building +libbpf, clear CROSS_COMPILE before building resolve_btfids. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: Andrii Nakryiko +Acked-by: Quentin Monnet +Link: https://lore.kernel.org/bpf/20211216163842.829836-3-jean-philippe@linaro.org +Acked-by: Shung-Hsi Yu +--- + tools/bpf/resolve_btfids/Makefile | 1 + + 1 file changed, 1 insertion(+) + +--- a/tools/bpf/resolve_btfids/Makefile ++++ b/tools/bpf/resolve_btfids/Makefile +@@ -19,6 +19,7 @@ CC = $(HOSTCC) + LD = $(HOSTLD) + ARCH = $(HOSTARCH) + RM ?= rm ++CROSS_COMPILE = + + OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/ + diff --git a/patches.suse/tools-runqslower-Enable-cross-building-with-clang.patch b/patches.suse/tools-runqslower-Enable-cross-building-with-clang.patch new file mode 100644 index 0000000..7c5d2b7 --- /dev/null +++ b/patches.suse/tools-runqslower-Enable-cross-building-with-clang.patch @@ -0,0 +1,39 @@ +From: Jean-Philippe Brucker +Date: Thu, 16 Dec 2021 16:38:42 +0000 +Subject: tools/runqslower: Enable cross-building with clang +Patch-mainline: v5.17-rc1 +Git-commit: bb7b75e860eec31aa67b83935849fdc46418c13e +References: jsc#PED-1368 + +Cross-building using clang requires passing the "-target" flag rather +than using the CROSS_COMPILE prefix. Makefile.include transforms +CROSS_COMPILE into CLANG_CROSS_FLAGS. Add them to CFLAGS, and erase +CROSS_COMPILE for the bpftool build, since it needs to be executed on +the host. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: Andrii Nakryiko +Acked-by: Quentin Monnet +Link: https://lore.kernel.org/bpf/20211216163842.829836-6-jean-philippe@linaro.org +Acked-by: Shung-Hsi Yu +--- + tools/bpf/runqslower/Makefile | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/tools/bpf/runqslower/Makefile ++++ b/tools/bpf/runqslower/Makefile +@@ -12,7 +12,7 @@ BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a + BPF_DESTDIR := $(BPFOBJ_OUTPUT) + BPF_INCLUDE := $(BPF_DESTDIR)/include + INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../include/uapi) +-CFLAGS := -g -Wall ++CFLAGS := -g -Wall $(CLANG_CROSS_FLAGS) + + # Try to detect best kernel BTF source + KERNEL_REL := $(shell uname -r) +@@ -88,4 +88,4 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[c + + $(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT) + $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) \ +- CC=$(HOSTCC) LD=$(HOSTLD) ++ ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) diff --git a/patches.suse/tools-runqslower-Update-perf_buffer__new-calls.patch b/patches.suse/tools-runqslower-Update-perf_buffer__new-calls.patch new file mode 100644 index 0000000..807f951 --- /dev/null +++ b/patches.suse/tools-runqslower-Update-perf_buffer__new-calls.patch @@ -0,0 +1,40 @@ +From: Andrii Nakryiko +Date: Wed, 10 Nov 2021 21:36:23 -0800 +Subject: tools/runqslower: Update perf_buffer__new() calls +Patch-mainline: v5.17-rc1 +Git-commit: eda8bfa5b7c76d332ece1f24a3662ca843fd880a +References: jsc#PED-1368 + +Use v1.0+ compatible variant of perf_buffer__new() call to prepare for +deprecation. + +Signed-off-by: Andrii Nakryiko +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20211111053624.190580-9-andrii@kernel.org +Acked-by: Shung-Hsi Yu +--- + tools/bpf/runqslower/runqslower.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/tools/bpf/runqslower/runqslower.c ++++ b/tools/bpf/runqslower/runqslower.c +@@ -123,7 +123,6 @@ int main(int argc, char **argv) + .parser = parse_arg, + .doc = argp_program_doc, + }; +- struct perf_buffer_opts pb_opts; + struct perf_buffer *pb = NULL; + struct runqslower_bpf *obj; + int err; +@@ -165,9 +164,8 @@ int main(int argc, char **argv) + printf("Tracing run queue latency higher than %llu us\n", env.min_us); + printf("%-8s %-16s %-6s %14s\n", "TIME", "COMM", "PID", "LAT(us)"); + +- pb_opts.sample_cb = handle_event; +- pb_opts.lost_cb = handle_lost_events; +- pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64, &pb_opts); ++ pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64, ++ handle_event, handle_lost_events, NULL, NULL); + err = libbpf_get_error(pb); + if (err) { + pb = NULL; diff --git a/patches.suse/tracing-Fix-memory-leak-in-test_gen_synth_cmd-and-test_empty_synth_event.patch b/patches.suse/tracing-Fix-memory-leak-in-test_gen_synth_cmd-and-test_empty_synth_event.patch new file mode 100644 index 0000000..a2b0173 --- /dev/null +++ b/patches.suse/tracing-Fix-memory-leak-in-test_gen_synth_cmd-and-test_empty_synth_event.patch @@ -0,0 +1,100 @@ +From: Shang XiaoJing +Date: Thu, 17 Nov 2022 09:23:45 +0800 +Subject: tracing: Fix memory leak in test_gen_synth_cmd() and + test_empty_synth_event() +Git-commit: a4527fef9afe5c903c718d0cd24609fe9c754250 +Patch-mainline: v6.1-rc6 +References: git-fixes + +test_gen_synth_cmd() only free buf in fail path, hence buf will leak +when there is no failure. Add kfree(buf) to prevent the memleak. The +same reason and solution in test_empty_synth_event(). + +unreferenced object 0xffff8881127de000 (size 2048): + comm "modprobe", pid 247, jiffies 4294972316 (age 78.756s) + hex dump (first 32 bytes): + 20 67 65 6e 5f 73 79 6e 74 68 5f 74 65 73 74 20 gen_synth_test + 20 70 69 64 5f 74 20 6e 65 78 74 5f 70 69 64 5f pid_t next_pid_ + backtrace: + [<000000004254801a>] kmalloc_trace+0x26/0x100 + [<0000000039eb1cf5>] 0xffffffffa00083cd + [<000000000e8c3bc8>] 0xffffffffa00086ba + [<00000000c293d1ea>] do_one_initcall+0xdb/0x480 + [<00000000aa189e6d>] do_init_module+0x1cf/0x680 + [<00000000d513222b>] load_module+0x6a50/0x70a0 + [<000000001fd4d529>] __do_sys_finit_module+0x12f/0x1c0 + [<00000000b36c4c0f>] do_syscall_64+0x3f/0x90 + [<00000000bbf20cf3>] entry_SYSCALL_64_after_hwframe+0x63/0xcd +unreferenced object 0xffff8881127df000 (size 2048): + comm "modprobe", pid 247, jiffies 4294972324 (age 78.728s) + hex dump (first 32 bytes): + 20 65 6d 70 74 79 5f 73 79 6e 74 68 5f 74 65 73 empty_synth_tes + 74 20 20 70 69 64 5f 74 20 6e 65 78 74 5f 70 69 t pid_t next_pi + backtrace: + [<000000004254801a>] kmalloc_trace+0x26/0x100 + [<00000000d4db9a3d>] 0xffffffffa0008071 + [<00000000c31354a5>] 0xffffffffa00086ce + [<00000000c293d1ea>] do_one_initcall+0xdb/0x480 + [<00000000aa189e6d>] do_init_module+0x1cf/0x680 + [<00000000d513222b>] load_module+0x6a50/0x70a0 + [<000000001fd4d529>] __do_sys_finit_module+0x12f/0x1c0 + [<00000000b36c4c0f>] do_syscall_64+0x3f/0x90 + [<00000000bbf20cf3>] entry_SYSCALL_64_after_hwframe+0x63/0xcd + +Link: https://lkml.kernel.org/r/20221117012346.22647-2-shangxiaojing@huawei.com + +Cc: +Cc: +Cc: +Cc: stable@vger.kernel.org +Fixes: 9fe41efaca08 ("tracing: Add synth event generation test module") +Signed-off-by: Shang XiaoJing +Signed-off-by: Steven Rostedt (Google) +Acked-by: Petr Pavlu +--- + kernel/trace/synth_event_gen_test.c | 16 ++++++---------- + 1 file changed, 6 insertions(+), 10 deletions(-) + +diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c +index 0b15e975d2c2..8d77526892f4 100644 +--- a/kernel/trace/synth_event_gen_test.c ++++ b/kernel/trace/synth_event_gen_test.c +@@ -120,15 +120,13 @@ static int __init test_gen_synth_cmd(void) + + /* Now generate a gen_synth_test event */ + ret = synth_event_trace_array(gen_synth_test, vals, ARRAY_SIZE(vals)); +- out: ++ free: ++ kfree(buf); + return ret; + delete: + /* We got an error after creating the event, delete it */ + synth_event_delete("gen_synth_test"); +- free: +- kfree(buf); +- +- goto out; ++ goto free; + } + + /* +@@ -227,15 +225,13 @@ static int __init test_empty_synth_event(void) + + /* Now trace an empty_synth_test event */ + ret = synth_event_trace_array(empty_synth_test, vals, ARRAY_SIZE(vals)); +- out: ++ free: ++ kfree(buf); + return ret; + delete: + /* We got an error after creating the event, delete it */ + synth_event_delete("empty_synth_test"); +- free: +- kfree(buf); +- +- goto out; ++ goto free; + } + + static struct synth_field_desc create_synth_test_fields[] = { + diff --git a/patches.suse/tracing-Fix-memory-leak-in-tracing_read_pipe.patch b/patches.suse/tracing-Fix-memory-leak-in-tracing_read_pipe.patch new file mode 100644 index 0000000..ec51cda --- /dev/null +++ b/patches.suse/tracing-Fix-memory-leak-in-tracing_read_pipe.patch @@ -0,0 +1,57 @@ +From: Wang Yufen +Date: Mon, 7 Nov 2022 19:04:50 +0800 +Subject: tracing: Fix memory leak in tracing_read_pipe() +Git-commit: 649e72070cbbb8600eb823833e4748f5a0815116 +Patch-mainline: v6.1-rc6 +References: git-fixes + +kmemleak reports this issue: + +unreferenced object 0xffff888105a18900 (size 128): + comm "test_progs", pid 18933, jiffies 4336275356 (age 22801.766s) + hex dump (first 32 bytes): + 25 73 00 90 81 88 ff ff 26 05 00 00 42 01 58 04 %s......&...B.X. + 03 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace: + [<00000000560143a1>] __kmalloc_node_track_caller+0x4a/0x140 + [<000000006af00822>] krealloc+0x8d/0xf0 + [<00000000c309be6a>] trace_iter_expand_format+0x99/0x150 + [<000000005a53bdb6>] trace_check_vprintf+0x1e0/0x11d0 + [<0000000065629d9d>] trace_event_printf+0xb6/0xf0 + [<000000009a690dc7>] trace_raw_output_bpf_trace_printk+0x89/0xc0 + [<00000000d22db172>] print_trace_line+0x73c/0x1480 + [<00000000cdba76ba>] tracing_read_pipe+0x45c/0x9f0 + [<0000000015b58459>] vfs_read+0x17b/0x7c0 + [<000000004aeee8ed>] ksys_read+0xed/0x1c0 + [<0000000063d3d898>] do_syscall_64+0x3b/0x90 + [<00000000a06dda7f>] entry_SYSCALL_64_after_hwframe+0x63/0xcd + +iter->fmt alloced in + tracing_read_pipe() -> .. ->trace_iter_expand_format(), but not +freed, to fix, add free in tracing_release_pipe() + +Link: https://lkml.kernel.org/r/1667819090-4643-1-git-send-email-wangyufen@huawei.com + +Cc: stable@vger.kernel.org +Fixes: efbbdaa22bb7 ("tracing: Show real address for trace event arguments") +Acked-by: Masami Hiramatsu (Google) +Signed-off-by: Wang Yufen +Signed-off-by: Steven Rostedt (Google) +Acked-by: Petr Pavlu +--- + kernel/trace/trace.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index c6c7a0af3ed2..5bd202d6d79a 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -6657,6 +6657,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) + mutex_unlock(&trace_types_lock); + + free_cpumask_var(iter->started); ++ kfree(iter->fmt); + mutex_destroy(&iter->mutex); + kfree(iter); + + diff --git a/patches.suse/tracing-Fix-wild-memory-access-in-register_synth_event.patch b/patches.suse/tracing-Fix-wild-memory-access-in-register_synth_event.patch new file mode 100644 index 0000000..3acf54f --- /dev/null +++ b/patches.suse/tracing-Fix-wild-memory-access-in-register_synth_event.patch @@ -0,0 +1,95 @@ +From: Shang XiaoJing +Date: Thu, 17 Nov 2022 09:23:46 +0800 +Subject: tracing: Fix wild-memory-access in register_synth_event() +Git-commit: 1b5f1c34d3f5a664a57a5a7557a50e4e3cc2505c +Patch-mainline: v6.1-rc6 +References: git-fixes + +In register_synth_event(), if set_synth_event_print_fmt() failed, then +both trace_remove_event_call() and unregister_trace_event() will be +called, which means the trace_event_call will call +__unregister_trace_event() twice. As the result, the second unregister +will causes the wild-memory-access. + +register_synth_event + set_synth_event_print_fmt failed + trace_remove_event_call + event_remove + if call->event.funcs then + __unregister_trace_event (first call) + unregister_trace_event + __unregister_trace_event (second call) + +Fix the bug by avoiding to call the second __unregister_trace_event() by +checking if the first one is called. + +general protection fault, probably for non-canonical address + 0xfbd59c0000000024: 0000 [#1] SMP KASAN PTI +KASAN: maybe wild-memory-access in range +[0xdead000000000120-0xdead000000000127] +CPU: 0 PID: 3807 Comm: modprobe Not tainted +6.1.0-rc1-00186-g76f33a7eedb4 #299 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS +rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 +RIP: 0010:unregister_trace_event+0x6e/0x280 +Code: 00 fc ff df 4c 89 ea 48 c1 ea 03 80 3c 02 00 0f 85 0e 02 00 00 48 +b8 00 00 00 00 00 fc ff df 4c 8b 63 08 4c 89 e2 48 c1 ea 03 <80> 3c 02 +00 0f 85 e2 01 00 00 49 89 2c 24 48 85 ed 74 28 e8 7a 9b +RSP: 0018:ffff88810413f370 EFLAGS: 00010a06 +RAX: dffffc0000000000 RBX: ffff888105d050b0 RCX: 0000000000000000 +RDX: 1bd5a00000000024 RSI: ffff888119e276e0 RDI: ffffffff835a8b20 +RBP: dead000000000100 R08: 0000000000000000 R09: fffffbfff0913481 +R10: ffffffff8489a407 R11: fffffbfff0913480 R12: dead000000000122 +R13: ffff888105d050b8 R14: 0000000000000000 R15: ffff888105d05028 +FS: 00007f7823e8d540(0000) GS:ffff888119e00000(0000) +knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f7823e7ebec CR3: 000000010a058002 CR4: 0000000000330ef0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + __create_synth_event+0x1e37/0x1eb0 + create_or_delete_synth_event+0x110/0x250 + synth_event_run_command+0x2f/0x110 + test_gen_synth_cmd+0x170/0x2eb [synth_event_gen_test] + synth_event_gen_test_init+0x76/0x9bc [synth_event_gen_test] + do_one_initcall+0xdb/0x480 + do_init_module+0x1cf/0x680 + load_module+0x6a50/0x70a0 + __do_sys_finit_module+0x12f/0x1c0 + do_syscall_64+0x3f/0x90 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + +Link: https://lkml.kernel.org/r/20221117012346.22647-3-shangxiaojing@huawei.com + +Fixes: 4b147936fa50 ("tracing: Add support for 'synthetic' events") +Signed-off-by: Shang XiaoJing +Cc: stable@vger.kernel.org +Cc: +Cc: +Cc: +Signed-off-by: Steven Rostedt (Google) +Acked-by: Petr Pavlu +--- + kernel/trace/trace_events_synth.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c +index e310052dc83c..29fbfb27c2b2 100644 +--- a/kernel/trace/trace_events_synth.c ++++ b/kernel/trace/trace_events_synth.c +@@ -828,10 +828,9 @@ static int register_synth_event(struct synth_event *event) + } + + ret = set_synth_event_print_fmt(call); +- if (ret < 0) { ++ /* unregister_trace_event() will be called inside */ ++ if (ret < 0) + trace_remove_event_call(call); +- goto err; +- } + out: + return ret; + err: + diff --git a/patches.suse/tracing-kprobe-Fix-potential-null-ptr-deref-on-trace_array-in-kprobe_event_gen_test_exit.patch b/patches.suse/tracing-kprobe-Fix-potential-null-ptr-deref-on-trace_array-in-kprobe_event_gen_test_exit.patch new file mode 100644 index 0000000..1497515 --- /dev/null +++ b/patches.suse/tracing-kprobe-Fix-potential-null-ptr-deref-on-trace_array-in-kprobe_event_gen_test_exit.patch @@ -0,0 +1,85 @@ +From: Shang XiaoJing +Date: Fri, 18 Nov 2022 10:15:34 +0900 +Subject: tracing: kprobe: Fix potential null-ptr-deref on trace_array in + kprobe_event_gen_test_exit() +Git-commit: 22ea4ca9631eb137e64e5ab899e9c89cb6670959 +Patch-mainline: v6.1-rc6 +References: git-fixes + +When test_gen_kprobe_cmd() failed after kprobe_event_gen_cmd_end(), it +will goto delete, which will call kprobe_event_delete() and release the +corresponding resource. However, the trace_array in gen_kretprobe_test +will point to the invalid resource. Set gen_kretprobe_test to NULL +after called kprobe_event_delete() to prevent null-ptr-deref. + +BUG: kernel NULL pointer dereference, address: 0000000000000070 +PGD 0 P4D 0 +Oops: 0000 [#1] SMP PTI +CPU: 0 PID: 246 Comm: modprobe Tainted: G W +6.1.0-rc1-00174-g9522dc5c87da-dirty #248 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS +rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 +RIP: 0010:__ftrace_set_clr_event_nolock+0x53/0x1b0 +Code: e8 82 26 fc ff 49 8b 1e c7 44 24 0c ea ff ff ff 49 39 de 0f 84 3c +01 00 00 c7 44 24 18 00 00 00 00 e8 61 26 fc ff 48 8b 6b 10 <44> 8b 65 +70 4c 8b 6d 18 41 f7 c4 00 02 00 00 75 2f +RSP: 0018:ffffc9000159fe00 EFLAGS: 00010293 +RAX: 0000000000000000 RBX: ffff88810971d268 RCX: 0000000000000000 +RDX: ffff8881080be600 RSI: ffffffff811b48ff RDI: ffff88810971d058 +RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000001 +R10: ffffc9000159fe58 R11: 0000000000000001 R12: ffffffffa0001064 +R13: ffffffffa000106c R14: ffff88810971d238 R15: 0000000000000000 +FS: 00007f89eeff6540(0000) GS:ffff88813b600000(0000) +knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000000000070 CR3: 000000010599e004 CR4: 0000000000330ef0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + __ftrace_set_clr_event+0x3e/0x60 + trace_array_set_clr_event+0x35/0x50 + ? 0xffffffffa0000000 + kprobe_event_gen_test_exit+0xcd/0x10b [kprobe_event_gen_test] + __x64_sys_delete_module+0x206/0x380 + ? lockdep_hardirqs_on_prepare+0xd8/0x190 + ? syscall_enter_from_user_mode+0x1c/0x50 + do_syscall_64+0x3f/0x90 + entry_SYSCALL_64_after_hwframe+0x63/0xcd +RIP: 0033:0x7f89eeb061b7 + +Link: https://lore.kernel.org/all/20221108015130.28326-3-shangxiaojing@huawei.com/ + +Fixes: 64836248dda2 ("tracing: Add kprobe event command generation test module") +Signed-off-by: Shang XiaoJing +Cc: stable@vger.kernel.org +Acked-by: Masami Hiramatsu (Google) +Signed-off-by: Masami Hiramatsu (Google) +Acked-by: Petr Pavlu +--- + kernel/trace/kprobe_event_gen_test.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c +index 1c98fafcf333..c736487fc0e4 100644 +--- a/kernel/trace/kprobe_event_gen_test.c ++++ b/kernel/trace/kprobe_event_gen_test.c +@@ -143,6 +143,8 @@ static int __init test_gen_kprobe_cmd(void) + kfree(buf); + return ret; + delete: ++ if (trace_event_file_is_valid(gen_kprobe_test)) ++ gen_kprobe_test = NULL; + /* We got an error after creating the event, delete it */ + ret = kprobe_event_delete("gen_kprobe_test"); + goto out; +@@ -206,6 +208,8 @@ static int __init test_gen_kretprobe_cmd(void) + kfree(buf); + return ret; + delete: ++ if (trace_event_file_is_valid(gen_kretprobe_test)) ++ gen_kretprobe_test = NULL; + /* We got an error after creating the event, delete it */ + ret = kprobe_event_delete("gen_kretprobe_test"); + goto out; + diff --git a/patches.suse/tracing-kprobe-Fix-potential-null-ptr-deref-on-trace_event_file-in-kprobe_event_gen_test_exit.patch b/patches.suse/tracing-kprobe-Fix-potential-null-ptr-deref-on-trace_event_file-in-kprobe_event_gen_test_exit.patch new file mode 100644 index 0000000..8baba41 --- /dev/null +++ b/patches.suse/tracing-kprobe-Fix-potential-null-ptr-deref-on-trace_event_file-in-kprobe_event_gen_test_exit.patch @@ -0,0 +1,131 @@ +From: Shang XiaoJing +Date: Fri, 18 Nov 2022 10:15:33 +0900 +Subject: tracing: kprobe: Fix potential null-ptr-deref on trace_event_file in + kprobe_event_gen_test_exit() +Git-commit: e0d75267f59d7084e0468bd68beeb1bf9c71d7c0 +Patch-mainline: v6.1-rc6 +References: git-fixes + +When trace_get_event_file() failed, gen_kretprobe_test will be assigned +as the error code. If module kprobe_event_gen_test is removed now, the +null pointer dereference will happen in kprobe_event_gen_test_exit(). +Check if gen_kprobe_test or gen_kretprobe_test is error code or NULL +before dereference them. + +BUG: kernel NULL pointer dereference, address: 0000000000000012 +PGD 0 P4D 0 +Oops: 0000 [#1] SMP PTI +CPU: 3 PID: 2210 Comm: modprobe Not tainted +6.1.0-rc1-00171-g2159299a3b74-dirty #217 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS +rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 +RIP: 0010:kprobe_event_gen_test_exit+0x1c/0xb5 [kprobe_event_gen_test] +Code: Unable to access opcode bytes at 0xffffffff9ffffff2. +RSP: 0018:ffffc900015bfeb8 EFLAGS: 00010246 +RAX: ffffffffffffffea RBX: ffffffffa0002080 RCX: 0000000000000000 +RDX: ffffffffa0001054 RSI: ffffffffa0001064 RDI: ffffffffdfc6349c +RBP: ffffffffa0000000 R08: 0000000000000004 R09: 00000000001e95c0 +R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000800 +R13: ffffffffa0002420 R14: 0000000000000000 R15: 0000000000000000 +FS: 00007f56b75be540(0000) GS:ffff88813bc00000(0000) +knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: ffffffff9ffffff2 CR3: 000000010874a006 CR4: 0000000000330ee0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + __x64_sys_delete_module+0x206/0x380 + ? lockdep_hardirqs_on_prepare+0xd8/0x190 + ? syscall_enter_from_user_mode+0x1c/0x50 + do_syscall_64+0x3f/0x90 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + +Link: https://lore.kernel.org/all/20221108015130.28326-2-shangxiaojing@huawei.com/ + +Fixes: 64836248dda2 ("tracing: Add kprobe event command generation test module") +Signed-off-by: Shang XiaoJing +Acked-by: Masami Hiramatsu (Google) +Cc: stable@vger.kernel.org +Signed-off-by: Masami Hiramatsu (Google) +Acked-by: Petr Pavlu +--- + kernel/trace/kprobe_event_gen_test.c | 44 +++++++++++++++++++++++------------- + 1 file changed, 28 insertions(+), 16 deletions(-) + +diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c +index d81f7c51025c..1c98fafcf333 100644 +--- a/kernel/trace/kprobe_event_gen_test.c ++++ b/kernel/trace/kprobe_event_gen_test.c +@@ -73,6 +73,10 @@ static struct trace_event_file *gen_kretprobe_test; + #define KPROBE_GEN_TEST_ARG3 NULL + #endif + ++static bool trace_event_file_is_valid(struct trace_event_file *input) ++{ ++ return input && !IS_ERR(input); ++} + + /* + * Test to make sure we can create a kprobe event, then add more +@@ -217,10 +221,12 @@ static int __init kprobe_event_gen_test_init(void) + + ret = test_gen_kretprobe_cmd(); + if (ret) { +- WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, +- "kprobes", +- "gen_kretprobe_test", false)); +- trace_put_event_file(gen_kretprobe_test); ++ if (trace_event_file_is_valid(gen_kretprobe_test)) { ++ WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, ++ "kprobes", ++ "gen_kretprobe_test", false)); ++ trace_put_event_file(gen_kretprobe_test); ++ } + WARN_ON(kprobe_event_delete("gen_kretprobe_test")); + } + +@@ -229,24 +235,30 @@ static int __init kprobe_event_gen_test_init(void) + + static void __exit kprobe_event_gen_test_exit(void) + { +- /* Disable the event or you can't remove it */ +- WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr, +- "kprobes", +- "gen_kprobe_test", false)); ++ if (trace_event_file_is_valid(gen_kprobe_test)) { ++ /* Disable the event or you can't remove it */ ++ WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr, ++ "kprobes", ++ "gen_kprobe_test", false)); ++ ++ /* Now give the file and instance back */ ++ trace_put_event_file(gen_kprobe_test); ++ } + +- /* Now give the file and instance back */ +- trace_put_event_file(gen_kprobe_test); + + /* Now unregister and free the event */ + WARN_ON(kprobe_event_delete("gen_kprobe_test")); + +- /* Disable the event or you can't remove it */ +- WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, +- "kprobes", +- "gen_kretprobe_test", false)); ++ if (trace_event_file_is_valid(gen_kretprobe_test)) { ++ /* Disable the event or you can't remove it */ ++ WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, ++ "kprobes", ++ "gen_kretprobe_test", false)); ++ ++ /* Now give the file and instance back */ ++ trace_put_event_file(gen_kretprobe_test); ++ } + +- /* Now give the file and instance back */ +- trace_put_event_file(gen_kretprobe_test); + + /* Now unregister and free the event */ + WARN_ON(kprobe_event_delete("gen_kretprobe_test")); + diff --git a/patches.suse/tracing-ring-buffer-Have-polling-block-on-watermark.patch b/patches.suse/tracing-ring-buffer-Have-polling-block-on-watermark.patch new file mode 100644 index 0000000..e29561d --- /dev/null +++ b/patches.suse/tracing-ring-buffer-Have-polling-block-on-watermark.patch @@ -0,0 +1,192 @@ +From: "Steven Rostedt (Google)" +Date: Thu, 20 Oct 2022 23:14:27 -0400 +Subject: tracing/ring-buffer: Have polling block on watermark +Git-commit: 42fb0a1e84ff525ebe560e2baf9451ab69127e2b +Patch-mainline: v6.1-rc6 +References: git-fixes + +Currently the way polling works on the ring buffer is broken. It will +return immediately if there's any data in the ring buffer whereas a read +will block until the watermark (defined by the tracefs buffer_percent file) +is hit. + +That is, a select() or poll() will return as if there's data available, +but then the following read will block. This is broken for the way +select()s and poll()s are supposed to work. + +Have the polling on the ring buffer also block the same way reads and +splice does on the ring buffer. + +Link: https://lkml.kernel.org/r/20221020231427.41be3f26@gandalf.local.home + +Cc: Linux Trace Kernel +Cc: Masami Hiramatsu +Cc: Mathieu Desnoyers +Cc: Primiano Tucci +Cc: stable@vger.kernel.org +Fixes: 1e0d6714aceb7 ("ring-buffer: Do not wake up a splice waiter when page is not full") +Signed-off-by: Steven Rostedt (Google) +Acked-by: Petr Pavlu +--- + include/linux/ring_buffer.h | 2 +- + kernel/trace/ring_buffer.c | 55 +++++++++++++++++++++++++++++---------------- + kernel/trace/trace.c | 2 +- + 3 files changed, 38 insertions(+), 21 deletions(-) + +diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h +index 2504df9a0453..3c7d295746f6 100644 +--- a/include/linux/ring_buffer.h ++++ b/include/linux/ring_buffer.h +@@ -100,7 +100,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k + + int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full); + __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, +- struct file *filp, poll_table *poll_table); ++ struct file *filp, poll_table *poll_table, int full); + void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu); + + #define RING_BUFFER_ALL_CPUS -1 +diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c +index 9712083832f4..089b1ec9cb3b 100644 +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -907,6 +907,21 @@ size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu) + return cnt - read; + } + ++static __always_inline bool full_hit(struct trace_buffer *buffer, int cpu, int full) ++{ ++ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; ++ size_t nr_pages; ++ size_t dirty; ++ ++ nr_pages = cpu_buffer->nr_pages; ++ if (!nr_pages || !full) ++ return true; ++ ++ dirty = ring_buffer_nr_dirty_pages(buffer, cpu); ++ ++ return (dirty * 100) > (full * nr_pages); ++} ++ + /* + * rb_wake_up_waiters - wake up tasks waiting for ring buffer input + * +@@ -1046,22 +1061,20 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) + !ring_buffer_empty_cpu(buffer, cpu)) { + unsigned long flags; + bool pagebusy; +- size_t nr_pages; +- size_t dirty; ++ bool done; + + if (!full) + break; + + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; +- nr_pages = cpu_buffer->nr_pages; +- dirty = ring_buffer_nr_dirty_pages(buffer, cpu); ++ done = !pagebusy && full_hit(buffer, cpu, full); ++ + if (!cpu_buffer->shortest_full || + cpu_buffer->shortest_full > full) + cpu_buffer->shortest_full = full; + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); +- if (!pagebusy && +- (!nr_pages || (dirty * 100) > full * nr_pages)) ++ if (done) + break; + } + +@@ -1087,6 +1100,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) + * @cpu: the cpu buffer to wait on + * @filp: the file descriptor + * @poll_table: The poll descriptor ++ * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS + * + * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon + * as data is added to any of the @buffer's cpu buffers. Otherwise +@@ -1096,14 +1110,15 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) + * zero otherwise. + */ + __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, +- struct file *filp, poll_table *poll_table) ++ struct file *filp, poll_table *poll_table, int full) + { + struct ring_buffer_per_cpu *cpu_buffer; + struct rb_irq_work *work; + +- if (cpu == RING_BUFFER_ALL_CPUS) ++ if (cpu == RING_BUFFER_ALL_CPUS) { + work = &buffer->irq_work; +- else { ++ full = 0; ++ } else { + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return -EINVAL; + +@@ -1111,8 +1126,14 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, + work = &cpu_buffer->irq_work; + } + +- poll_wait(filp, &work->waiters, poll_table); +- work->waiters_pending = true; ++ if (full) { ++ poll_wait(filp, &work->full_waiters, poll_table); ++ work->full_waiters_pending = true; ++ } else { ++ poll_wait(filp, &work->waiters, poll_table); ++ work->waiters_pending = true; ++ } ++ + /* + * There's a tight race between setting the waiters_pending and + * checking if the ring buffer is empty. Once the waiters_pending bit +@@ -1128,6 +1149,9 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, + */ + smp_mb(); + ++ if (full) ++ return full_hit(buffer, cpu, full) ? EPOLLIN | EPOLLRDNORM : 0; ++ + if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || + (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) + return EPOLLIN | EPOLLRDNORM; +@@ -3155,10 +3179,6 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, + static __always_inline void + rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) + { +- size_t nr_pages; +- size_t dirty; +- size_t full; +- + if (buffer->irq_work.waiters_pending) { + buffer->irq_work.waiters_pending = false; + /* irq_work_queue() supplies it's own memory barriers */ +@@ -3182,10 +3202,7 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) + + cpu_buffer->last_pages_touch = local_read(&cpu_buffer->pages_touched); + +- full = cpu_buffer->shortest_full; +- nr_pages = cpu_buffer->nr_pages; +- dirty = ring_buffer_nr_dirty_pages(buffer, cpu_buffer->cpu); +- if (full && nr_pages && (dirty * 100) <= full * nr_pages) ++ if (!full_hit(buffer, cpu_buffer->cpu, cpu_buffer->shortest_full)) + return; + + cpu_buffer->irq_work.wakeup_full = true; +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index 47a44b055a1d..c6c7a0af3ed2 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -6681,7 +6681,7 @@ trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_tabl + return EPOLLIN | EPOLLRDNORM; + else + return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file, +- filp, poll_table); ++ filp, poll_table, iter->tr->buffer_percent); + } + + static __poll_t + diff --git a/patches.suse/treewide-Add-missing-includes-masked-by-cgroup-bpf-d.patch b/patches.suse/treewide-Add-missing-includes-masked-by-cgroup-bpf-d.patch new file mode 100644 index 0000000..1a1c1c3 --- /dev/null +++ b/patches.suse/treewide-Add-missing-includes-masked-by-cgroup-bpf-d.patch @@ -0,0 +1,225 @@ +From: Jakub Kicinski +Date: Thu, 2 Dec 2021 12:34:00 -0800 +Subject: treewide: Add missing includes masked by cgroup -> bpf dependency +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.16-rc5 +Git-commit: 8581fd402a0cf80b5298e3b225e7a7bd8f110e69 +References: jsc#PED-1368 +X-info: ignore changes in mm/damon/vaddr.c which is not backported, missing commit 3f49584b262c "mm/damon: implement primitives for the virtual memory address spaces" +X-info: contexnt changes in drivers/gpu/drm/msm/msm_gem_shrinker.c since commit 89e56d5ed1f7 "drm/msm: Fix missing include files in msm_gem_shrinker.c" is not backported +X-info: ignore changes in drivers/pci/controller/dwc/pcie-qcom-ep.c which is not backported, missing commit f55fee56a631 "PCI: qcom-ep: Add Qualcomm PCIe Endpoint controller driver" + +cgroup.h (therefore swap.h, therefore half of the universe) +includes bpf.h which in turn includes module.h and slab.h. +Since we're about to get rid of that dependency we need +to clean things up. + +v2: drop the cpu.h include from cacheinfo.h, it's not necessary +and it makes riscv sensitive to ordering of include files. + +Signed-off-by: Jakub Kicinski +Signed-off-by: Alexei Starovoitov +Reviewed-by: Christoph Hellwig +Acked-by: Krzysztof Wilczyński +Acked-by: Peter Chen +Acked-by: SeongJae Park +Acked-by: Jani Nikula +Acked-by: Greg Kroah-Hartman +Link: https://lore.kernel.org/all/20211120035253.72074-1-kuba@kernel.org/ # v1 +Link: https://lore.kernel.org/all/20211120165528.197359-1-kuba@kernel.org/ # cacheinfo discussion +Link: https://lore.kernel.org/bpf/20211202203400.1208663-1-kuba@kernel.org +Acked-by: Shung-Hsi Yu +--- +syu: excluded change in mm/damon/vaddr.c and +drivers/pci/controller/dwc/pcie-qcom-ep.c since they don't exist. +--- + block/fops.c | 1 + + drivers/gpu/drm/drm_gem_shmem_helper.c | 1 + + drivers/gpu/drm/i915/gt/intel_gtt.c | 1 + + drivers/gpu/drm/i915/i915_request.c | 1 + + drivers/gpu/drm/lima/lima_device.c | 1 + + drivers/gpu/drm/msm/msm_gem_shrinker.c | 2 ++ + drivers/gpu/drm/ttm/ttm_tt.c | 1 + + drivers/net/ethernet/huawei/hinic/hinic_sriov.c | 1 + + drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c | 2 ++ + drivers/pci/controller/dwc/pci-exynos.c | 1 + + drivers/usb/cdns3/host.c | 1 + + include/linux/cacheinfo.h | 1 - + include/linux/device/driver.h | 1 + + include/linux/filter.h | 2 +- + mm/memory_hotplug.c | 1 + + mm/swap_slots.c | 1 + + 16 files changed, 17 insertions(+), 2 deletions(-) + +--- a/block/fops.c ++++ b/block/fops.c +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + #include "blk.h" + + static inline struct inode *bdev_file_inode(struct file *file) +--- a/drivers/gpu/drm/drm_gem_shmem_helper.c ++++ b/drivers/gpu/drm/drm_gem_shmem_helper.c +@@ -9,6 +9,7 @@ + #include + #include + #include ++#include + + #include + #include +--- a/drivers/gpu/drm/i915/gt/intel_gtt.c ++++ b/drivers/gpu/drm/i915/gt/intel_gtt.c +@@ -6,6 +6,7 @@ + #include /* fault-inject.h is not standalone! */ + + #include ++#include + + #include "gem/i915_gem_lmem.h" + #include "i915_trace.h" +--- a/drivers/gpu/drm/i915/i915_request.c ++++ b/drivers/gpu/drm/i915/i915_request.c +@@ -29,6 +29,7 @@ + #include + #include + #include ++#include + + #include "gem/i915_gem_context.h" + #include "gt/intel_breadcrumbs.h" +--- a/drivers/gpu/drm/lima/lima_device.c ++++ b/drivers/gpu/drm/lima/lima_device.c +@@ -4,6 +4,7 @@ + #include + #include + #include ++#include + #include + #include + +--- a/drivers/gpu/drm/msm/msm_gem_shrinker.c ++++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c +@@ -4,6 +4,8 @@ + * Author: Rob Clark + */ + ++#include ++ + #include "msm_drv.h" + #include "msm_gem.h" + #include "msm_gpu.h" +--- a/drivers/gpu/drm/ttm/ttm_tt.c ++++ b/drivers/gpu/drm/ttm/ttm_tt.c +@@ -35,6 +35,7 @@ + #include + #include + #include ++#include + #include + #include + +--- a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c ++++ b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c +@@ -8,6 +8,7 @@ + #include + #include + #include ++#include + + #include "hinic_hw_dev.h" + #include "hinic_dev.h" +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c +@@ -5,6 +5,8 @@ + * + */ + ++#include ++ + #include "otx2_common.h" + #include "otx2_ptp.h" + +--- a/drivers/pci/controller/dwc/pci-exynos.c ++++ b/drivers/pci/controller/dwc/pci-exynos.c +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + + #include "pcie-designware.h" + +--- a/drivers/usb/cdns3/host.c ++++ b/drivers/usb/cdns3/host.c +@@ -10,6 +10,7 @@ + */ + + #include ++#include + #include "core.h" + #include "drd.h" + #include "host-export.h" +--- a/include/linux/cacheinfo.h ++++ b/include/linux/cacheinfo.h +@@ -3,7 +3,6 @@ + #define _LINUX_CACHEINFO_H + + #include +-#include + #include + #include + +--- a/include/linux/device/driver.h ++++ b/include/linux/device/driver.h +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + + /** + * enum probe_type - device driver probe type to try +--- a/include/linux/filter.h ++++ b/include/linux/filter.h +@@ -8,6 +8,7 @@ + #include + + #include ++#include + #include + #include + #include +@@ -28,7 +29,6 @@ + + #include + #include +-#include + + struct sk_buff; + struct sock; +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -36,6 +36,7 @@ + #include + #include + #include ++#include + + #include + +--- a/mm/swap_slots.c ++++ b/mm/swap_slots.c +@@ -30,6 +30,7 @@ + #include + #include + #include ++#include + #include + #include + #include diff --git a/patches.suse/udf-use-sb_bdev_nr_blocks.patch b/patches.suse/udf-use-sb_bdev_nr_blocks.patch new file mode 100644 index 0000000..0273389 --- /dev/null +++ b/patches.suse/udf-use-sb_bdev_nr_blocks.patch @@ -0,0 +1,80 @@ +From: Christoph Hellwig +Date: Mon, 18 Oct 2021 12:11:30 +0200 +Subject: [PATCH] udf: use sb_bdev_nr_blocks +Git-commit: e4ae4735f7c2da77db57ee090b9f513bfe80d285 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +Use the sb_bdev_nr_blocks helper instead of open coding it. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Kees Cook +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20211018101130.1838532-31-hch@lst.de +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + fs/udf/lowlevel.c | 5 ++--- + fs/udf/super.c | 9 +++------ + 2 files changed, 5 insertions(+), 9 deletions(-) + +diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c +index f1094cdcd6cd..46d697172197 100644 +--- a/fs/udf/lowlevel.c ++++ b/fs/udf/lowlevel.c +@@ -47,8 +47,7 @@ unsigned int udf_get_last_session(struct super_block *sb) + + unsigned long udf_get_last_block(struct super_block *sb) + { +- struct block_device *bdev = sb->s_bdev; +- struct cdrom_device_info *cdi = disk_to_cdi(bdev->bd_disk); ++ struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk); + unsigned long lblock = 0; + + /* +@@ -56,7 +55,7 @@ unsigned long udf_get_last_block(struct super_block *sb) + * Try using the device size... + */ + if (!cdi || cdrom_get_last_written(cdi, &lblock) || lblock == 0) +- lblock = i_size_read(bdev->bd_inode) >> sb->s_blocksize_bits; ++ lblock = sb_bdev_nr_blocks(sb); + + if (lblock) + return lblock - 1; +diff --git a/fs/udf/super.c b/fs/udf/super.c +index b2d7c57d0688..34247fba6df9 100644 +--- a/fs/udf/super.c ++++ b/fs/udf/super.c +@@ -1175,8 +1175,7 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index) + struct udf_inode_info *vati; + uint32_t pos; + struct virtualAllocationTable20 *vat20; +- sector_t blocks = i_size_read(sb->s_bdev->bd_inode) >> +- sb->s_blocksize_bits; ++ sector_t blocks = sb_bdev_nr_blocks(sb); + + udf_find_vat_block(sb, p_index, type1_index, sbi->s_last_block); + if (!sbi->s_vat_inode && +@@ -1838,8 +1837,7 @@ static int udf_check_anchor_block(struct super_block *sb, sector_t block, + int ret; + + if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV) && +- udf_fixed_to_variable(block) >= +- i_size_read(sb->s_bdev->bd_inode) >> sb->s_blocksize_bits) ++ udf_fixed_to_variable(block) >= sb_bdev_nr_blocks(sb)) + return -EAGAIN; + + bh = udf_read_tagged(sb, block, block, &ident); +@@ -1901,8 +1899,7 @@ static int udf_scan_anchors(struct super_block *sb, sector_t *lastblock, + last[last_count++] = *lastblock - 152; + + for (i = 0; i < last_count; i++) { +- if (last[i] >= i_size_read(sb->s_bdev->bd_inode) >> +- sb->s_blocksize_bits) ++ if (last[i] >= sb_bdev_nr_blocks(sb)) + continue; + ret = udf_check_anchor_block(sb, last[i], fileset); + if (ret != -EAGAIN) { +-- +2.35.3 + diff --git a/patches.suse/um-drivers-ubd_kern-add-error-handling-support-for-a.patch b/patches.suse/um-drivers-ubd_kern-add-error-handling-support-for-a.patch new file mode 100644 index 0000000..95fdd35 --- /dev/null +++ b/patches.suse/um-drivers-ubd_kern-add-error-handling-support-for-a.patch @@ -0,0 +1,68 @@ +From: Luis Chamberlain +Date: Fri, 15 Oct 2021 16:30:26 -0700 +Subject: [PATCH] um/drivers/ubd_kern: add error handling support for + add_disk() +Git-commit: 66638f163a2b5c5b462ca38525129b14a20117eb +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +ubd_disk_register() never returned an error, so just fix +that now and let the caller handle the error condition. + +Reviewed-by: Gabriel Krisman Bertazi +Signed-off-by: Luis Chamberlain +Link: https://lore.kernel.org/r/20211015233028.2167651-8-mcgrof@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + arch/um/drivers/ubd_kern.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c +index fefd343412c7..69d2d0049a61 100644 +--- a/arch/um/drivers/ubd_kern.c ++++ b/arch/um/drivers/ubd_kern.c +@@ -855,8 +855,8 @@ static const struct attribute_group *ubd_attr_groups[] = { + NULL, + }; + +-static void ubd_disk_register(int major, u64 size, int unit, +- struct gendisk *disk) ++static int ubd_disk_register(int major, u64 size, int unit, ++ struct gendisk *disk) + { + disk->major = major; + disk->first_minor = unit << UBD_SHIFT; +@@ -873,7 +873,7 @@ static void ubd_disk_register(int major, u64 size, int unit, + + disk->private_data = &ubd_devs[unit]; + disk->queue = ubd_devs[unit].queue; +- device_add_disk(&ubd_devs[unit].pdev.dev, disk, ubd_attr_groups); ++ return device_add_disk(&ubd_devs[unit].pdev.dev, disk, ubd_attr_groups); + } + + #define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE)) +@@ -920,10 +920,15 @@ static int ubd_add(int n, char **error_out) + blk_queue_write_cache(ubd_dev->queue, true, false); + blk_queue_max_segments(ubd_dev->queue, MAX_SG); + blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1); +- ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, disk); ++ err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, disk); ++ if (err) ++ goto out_cleanup_disk; ++ + ubd_gendisk[n] = disk; + return 0; + ++out_cleanup_disk: ++ blk_cleanup_disk(disk); + out_cleanup_tags: + blk_mq_free_tag_set(&ubd_dev->tag_set); + out: +-- +2.35.3 + diff --git a/patches.suse/usb-core-Unregister-device-on-component_add-failure.patch b/patches.suse/usb-core-Unregister-device-on-component_add-failure.patch new file mode 100644 index 0000000..83908bf --- /dev/null +++ b/patches.suse/usb-core-Unregister-device-on-component_add-failure.patch @@ -0,0 +1,56 @@ +From c853685d11c09da35cb49bbf8f0c001abdc0d0a9 Mon Sep 17 00:00:00 2001 +From: "Fabio M. De Francesco" +Date: Wed, 9 Feb 2022 17:45:00 +0100 +Subject: [PATCH] usb: core: Unregister device on component_add() failure +Git-commit: c853685d11c09da35cb49bbf8f0c001abdc0d0a9 +Patch-mainline: v5.17-rc4 +References: git-fixes + +Commit 8c67d06f3fd9 ("usb: Link the ports to the connectors they are +attached to") creates a link to the USB Type-C connector for every new +port that is added when possible. If component_add() fails, +usb_hub_create_port_device() prints a warning but does not unregister +the device and does not return errors to the callers. + +Syzbot reported a "WARNING in component_del()". + +Fix this issue in usb_hub_create_port_device by calling device_unregister() +and returning the errors from component_add(). + +Fixes: 8c67d06f3fd9 ("usb: Link the ports to the connectors they are attached to") +Reported-and-tested-by: syzbot+60df062e1c41940cae0f@syzkaller.appspotmail.com +Reviewed-by: Heikki Krogerus +Signed-off-by: Fabio M. De Francesco +Link: https://lore.kernel.org/r/20220209164500.8769-1-fmdefrancesco@gmail.com +Signed-off-by: Greg Kroah-Hartman +Acked-by: Takashi Iwai + +--- + drivers/usb/core/port.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c +index c2bbf97a79be..d5bc36ca5b1f 100644 +--- a/drivers/usb/core/port.c ++++ b/drivers/usb/core/port.c +@@ -602,11 +602,14 @@ int usb_hub_create_port_device(struct usb_hub *hub, int port1) + return retval; + } + +- find_and_link_peer(hub, port1); +- + retval = component_add(&port_dev->dev, &connector_ops); +- if (retval) ++ if (retval) { + dev_warn(&port_dev->dev, "failed to add component\n"); ++ device_unregister(&port_dev->dev); ++ return retval; ++ } ++ ++ find_and_link_peer(hub, port1); + + /* + * Enable runtime pm and hold a refernce that hub_configure() +-- +2.35.3 + diff --git a/patches.suse/usb-gadget-aspeed-Fix-probe-regression.patch b/patches.suse/usb-gadget-aspeed-Fix-probe-regression.patch new file mode 100644 index 0000000..516417c --- /dev/null +++ b/patches.suse/usb-gadget-aspeed-Fix-probe-regression.patch @@ -0,0 +1,52 @@ +From 48ed32482c4100069d0c0eebdc6b198c6ae5f71f Mon Sep 17 00:00:00 2001 +From: Joel Stanley +Date: Mon, 17 Oct 2022 16:00:06 +1030 +Subject: [PATCH] usb: gadget: aspeed: Fix probe regression +Git-commit: 48ed32482c4100069d0c0eebdc6b198c6ae5f71f +Patch-mainline: v6.1-rc3 +References: git-fixes + +Since commit fc274c1e9973 ("USB: gadget: Add a new bus for gadgets"), +the gadget devices are proper driver core devices, which caused each +device to request pinmux settings: + + aspeed_vhub 1e6a0000.usb-vhub: Initialized virtual hub in USB2 mode + aspeed-g5-pinctrl 1e6e2080.pinctrl: pin A7 already requested by 1e6a0000.usb-vhub; cannot claim for gadget.0 + aspeed-g5-pinctrl 1e6e2080.pinctrl: pin-232 (gadget.0) status -22 + aspeed-g5-pinctrl 1e6e2080.pinctrl: could not request pin 232 (A7) from group USB2AD on device aspeed-g5-pinctrl + g_mass_storage gadget.0: Error applying setting, reverse things back + +The vhub driver has already claimed the pins, so prevent the gadgets +from requesting them too by setting the magic of_node_reused flag. This +causes the driver core to skip the mux request. + +Reported-by: Zev Weiss +Reported-by: Jae Hyun Yoo +Fixes: fc274c1e9973 ("USB: gadget: Add a new bus for gadgets") +Cc: stable@vger.kernel.org +Signed-off-by: Joel Stanley +Tested-by: Zev Weiss +Tested-by: Jae Hyun Yoo +Link: https://lore.kernel.org/r/20221017053006.358520-1-joel@jms.id.au +Signed-off-by: Greg Kroah-Hartman +Acked-by: Takashi Iwai + +--- + drivers/usb/gadget/udc/aspeed-vhub/dev.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/usb/gadget/udc/aspeed-vhub/dev.c b/drivers/usb/gadget/udc/aspeed-vhub/dev.c +index b0dfca43fbdc..4f3bc27c1c62 100644 +--- a/drivers/usb/gadget/udc/aspeed-vhub/dev.c ++++ b/drivers/usb/gadget/udc/aspeed-vhub/dev.c +@@ -591,6 +591,7 @@ int ast_vhub_init_dev(struct ast_vhub *vhub, unsigned int idx) + d->gadget.max_speed = USB_SPEED_HIGH; + d->gadget.speed = USB_SPEED_UNKNOWN; + d->gadget.dev.of_node = vhub->pdev->dev.of_node; ++ d->gadget.dev.of_node_reused = true; + + rc = usb_add_gadget_udc(d->port_dev, &d->gadget); + if (rc != 0) +-- +2.35.3 + diff --git a/patches.suse/usb-typec-tipd-Prevent-uninitialized-event-1-2-in-IR.patch b/patches.suse/usb-typec-tipd-Prevent-uninitialized-event-1-2-in-IR.patch new file mode 100644 index 0000000..3a3478d --- /dev/null +++ b/patches.suse/usb-typec-tipd-Prevent-uninitialized-event-1-2-in-IR.patch @@ -0,0 +1,57 @@ +From 6d8fc203b28ff8f6115fbe5eaf584de8b824f4fa Mon Sep 17 00:00:00 2001 +From: Sven Peter +Date: Wed, 2 Nov 2022 17:15:42 +0100 +Subject: [PATCH] usb: typec: tipd: Prevent uninitialized event{1,2} in IRQ handler +Mime-version: 1.0 +Content-type: text/plain; charset=UTF-8 +Content-transfer-encoding: 8bit +Git-commit: 6d8fc203b28ff8f6115fbe5eaf584de8b824f4fa +Patch-mainline: v6.1-rc6 +References: git-fixes + +If reading TPS_REG_INT_EVENT1/2 fails in the interrupt handler event1 +and event2 may be uninitialized when they are used to determine +IRQ_HANDLED vs. IRQ_NONE in the error path. + +Fixes: c7260e29dd20 ("usb: typec: tipd: Add short-circuit for no irqs") +Fixes: 45188f27b3d0 ("usb: typec: tipd: Add support for Apple CD321X") +Cc: stable +Signed-off-by: Sven Peter +Reviewed-by: Eric Curtin +Reviewed-by: Heikki Krogerus +Reviewed-by: Guido Günther +Link: https://lore.kernel.org/r/20221102161542.30669-1-sven@svenpeter.dev +Signed-off-by: Greg Kroah-Hartman +Acked-by: Takashi Iwai + +--- + drivers/usb/typec/tipd/core.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c +index b637e8b378b3..2a77bab948f5 100644 +--- a/drivers/usb/typec/tipd/core.c ++++ b/drivers/usb/typec/tipd/core.c +@@ -474,7 +474,7 @@ static void tps6598x_handle_plug_event(struct tps6598x *tps, u32 status) + static irqreturn_t cd321x_interrupt(int irq, void *data) + { + struct tps6598x *tps = data; +- u64 event; ++ u64 event = 0; + u32 status; + int ret; + +@@ -519,8 +519,8 @@ static irqreturn_t cd321x_interrupt(int irq, void *data) + static irqreturn_t tps6598x_interrupt(int irq, void *data) + { + struct tps6598x *tps = data; +- u64 event1; +- u64 event2; ++ u64 event1 = 0; ++ u64 event2 = 0; + u32 status; + int ret; + +-- +2.35.3 + diff --git a/patches.suse/usb-typec-ucsi-Only-check-the-contract-if-there-is-a.patch b/patches.suse/usb-typec-ucsi-Only-check-the-contract-if-there-is-a.patch new file mode 100644 index 0000000..148b488 --- /dev/null +++ b/patches.suse/usb-typec-ucsi-Only-check-the-contract-if-there-is-a.patch @@ -0,0 +1,49 @@ +From 3f345e907a8e7c56fdebf7231cd67afc85d02aaa Mon Sep 17 00:00:00 2001 +From: Heikki Krogerus +Date: Tue, 21 Dec 2021 17:03:52 +0300 +Subject: [PATCH] usb: typec: ucsi: Only check the contract if there is a connection +Git-commit: 3f345e907a8e7c56fdebf7231cd67afc85d02aaa +Patch-mainline: v5.16-rc8 +References: git-fixes + +The driver must make sure there is an actual connection +before checking details about the USB Power Delivery +contract. Those details are not valid unless there is a +connection. + +This fixes NULL pointer dereference that is caused by an +attempt to register bogus partner alternate mode that the +firmware on some platform may report before the actual +connection. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=215117 +Fixes: 6cbe4b2d5a3f ("usb: typec: ucsi: Check the partner alt modes always if there is PD contract") +Reported-by: Chris Hixon +Signed-off-by: Heikki Krogerus +Link: https://lore.kernel.org/r/eb34f98f-00ef-3238-2daa-80481116035d@leemhuis.info/ +Link: https://lore.kernel.org/r/20211221140352.45501-1-heikki.krogerus@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +Acked-by: Takashi Iwai + +--- + drivers/usb/typec/ucsi/ucsi.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c +index 6aa28384f77f..08561bf7c40c 100644 +--- a/drivers/usb/typec/ucsi/ucsi.c ++++ b/drivers/usb/typec/ucsi/ucsi.c +@@ -1150,7 +1150,9 @@ static int ucsi_register_port(struct ucsi *ucsi, int index) + ret = 0; + } + +- if (UCSI_CONSTAT_PWR_OPMODE(con->status.flags) == UCSI_CONSTAT_PWR_OPMODE_PD) { ++ if (con->partner && ++ UCSI_CONSTAT_PWR_OPMODE(con->status.flags) == ++ UCSI_CONSTAT_PWR_OPMODE_PD) { + ucsi_get_src_pdos(con); + ucsi_check_altmodes(con); + } +-- +2.35.3 + diff --git a/patches.suse/usb-xhci_plat_remove-avoid-NULL-dereference.patch b/patches.suse/usb-xhci_plat_remove-avoid-NULL-dereference.patch new file mode 100644 index 0000000..8ba5b61 --- /dev/null +++ b/patches.suse/usb-xhci_plat_remove-avoid-NULL-dereference.patch @@ -0,0 +1,132 @@ +From d7de14d74d6551f0d097430f9893ce82ad17e5b8 Mon Sep 17 00:00:00 2001 +From: Alexey Sheplyakov +Date: Fri, 22 Jul 2022 18:17:00 +0400 +Subject: [PATCH] usb: xhci_plat_remove: avoid NULL dereference +Git-commit: d7de14d74d6551f0d097430f9893ce82ad17e5b8 +Patch-mainline: v6.0-rc1 +References: git-fixes + +Since commit 4736ebd7fcaff1eb8481c140ba494962847d6e0a ("usb: host: +Xhci-plat: omit shared hcd if either root hub has no ports") +xhci->shared_hcd can be NULL, which causes the following Oops +on reboot: + +[ 710.124450] systemd-shutdown[1]: Rebooting. +[ 710.298861] xhci-hcd xhci-hcd.2.auto: remove, state 4 +[ 710.304217] usb usb3: USB disconnect, device number 1 +[ 710.317441] xhci-hcd xhci-hcd.2.auto: USB bus 3 deregistered +[ 710.323280] xhci-hcd xhci-hcd.2.auto: remove, state 1 +[ 710.328401] usb usb2: USB disconnect, device number 1 +[ 710.333515] usb 2-3: USB disconnect, device number 2 +[ 710.467649] xhci-hcd xhci-hcd.2.auto: USB bus 2 deregistered +[ 710.475450] Unable to handle kernel NULL pointer dereference at virtual address 00000000000003b8 +[ 710.484425] Mem abort info: +[ 710.487265] ESR = 0x0000000096000004 +[ 710.491060] EC = 0x25: DABT (current EL), IL = 32 bits +[ 710.496427] SET = 0, FnV = 0 +[ 710.499525] EA = 0, S1PTW = 0 +[ 710.502716] FSC = 0x04: level 0 translation fault +[ 710.507648] Data abort info: +[ 710.510577] ISV = 0, ISS = 0x00000004 +[ 710.514462] CM = 0, WnR = 0 +[ 710.517480] user pgtable: 4k pages, 48-bit VAs, pgdp=00000008b0050000 +[ 710.523976] [00000000000003b8] pgd=0000000000000000, p4d=0000000000000000 +[ 710.530961] Internal error: Oops: 96000004 [#1] PREEMPT SMP +[ 710.536551] Modules linked in: rfkill input_leds snd_soc_simple_card snd_soc_simple_card_utils snd_soc_nau8822 designware_i2s snd_soc_core dw_hdmi_ahb_audio snd_pcm_dmaengine arm_ccn panfrost ac97_bus gpu_sched snd_pcm at24 fuse configfs sdhci_of_dwcmshc sdhci_pltfm sdhci nvme led_class mmc_core nvme_core bt1_pvt polynomial tp_serio snd_seq_midi snd_seq_midi_event snd_seq snd_timer snd_rawmidi snd_seq_device snd soundcore efivarfs ipv6 +[ 710.575286] CPU: 7 PID: 1 Comm: systemd-shutdow Not tainted 5.19.0-rc7-00043-gfd8619f4fd54 #1 +[ 710.583822] Hardware name: T-Platforms TF307-MB/BM1BM1-A, BIOS 5.6 07/06/2022 +[ 710.590972] pstate: 40000005 (nZcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) +[ 710.597949] pc : usb_remove_hcd+0x34/0x1e4 +[ 710.602067] lr : xhci_plat_remove+0x74/0x140 +[ 710.606351] sp : ffff800009f3b7c0 +[ 710.609674] x29: ffff800009f3b7c0 x28: ffff000800960040 x27: 0000000000000000 +[ 710.616833] x26: ffff800008dc22a0 x25: 0000000000000000 x24: 0000000000000000 +[ 710.623992] x23: 0000000000000000 x22: ffff000805465810 x21: ffff000805465800 +[ 710.631149] x20: ffff000800f80000 x19: 0000000000000000 x18: ffffffffffffffff +[ 710.638307] x17: ffff000805096000 x16: ffff00080633b800 x15: ffff000806537a1c +[ 710.645465] x14: 0000000000000001 x13: 0000000000000000 x12: ffff00080378d6f0 +[ 710.652621] x11: ffff00080041a900 x10: ffff800009b204e8 x9 : ffff8000088abaa4 +[ 710.659779] x8 : ffff000800960040 x7 : ffff800009409000 x6 : 0000000000000001 +[ 710.666936] x5 : ffff800009241000 x4 : ffff800009241440 x3 : 0000000000000000 +[ 710.674094] x2 : ffff000800960040 x1 : ffff000800960040 x0 : 0000000000000000 +[ 710.681251] Call trace: +[ 710.683704] usb_remove_hcd+0x34/0x1e4 +[ 710.687467] xhci_plat_remove+0x74/0x140 +[ 710.691400] platform_remove+0x34/0x70 +[ 710.695165] device_remove+0x54/0x90 +[ 710.698753] device_release_driver_internal+0x200/0x270 +[ 710.703992] device_release_driver+0x24/0x30 +[ 710.708273] bus_remove_device+0xe0/0x16c +[ 710.712293] device_del+0x178/0x390 +[ 710.715797] platform_device_del.part.0+0x24/0x90 +[ 710.720514] platform_device_unregister+0x30/0x50 +[ 710.725232] dwc3_host_exit+0x20/0x30 +[ 710.728907] dwc3_remove+0x174/0x1b0 +[ 710.732494] platform_remove+0x34/0x70 +[ 710.736254] device_remove+0x54/0x90 +[ 710.739840] device_release_driver_internal+0x200/0x270 +[ 710.745078] device_release_driver+0x24/0x30 +[ 710.749359] bus_remove_device+0xe0/0x16c +[ 710.753380] device_del+0x178/0x390 +[ 710.756881] platform_device_del.part.0+0x24/0x90 +[ 710.761598] platform_device_unregister+0x30/0x50 +[ 710.766314] of_platform_device_destroy+0xe8/0x100 +[ 710.771119] device_for_each_child_reverse+0x70/0xc0 +[ 710.776099] of_platform_depopulate+0x48/0x90 +[ 710.780468] __dwc3_of_simple_teardown+0x28/0xe0 +[ 710.785099] dwc3_of_simple_shutdown+0x20/0x30 +[ 710.789555] platform_shutdown+0x30/0x40 +[ 710.793490] device_shutdown+0x138/0x32c +[ 710.797425] __do_sys_reboot+0x1c4/0x2ac +[ 710.801362] __arm64_sys_reboot+0x30/0x40 +[ 710.805383] invoke_syscall+0x50/0x120 +[ 710.809146] el0_svc_common.constprop.0+0x68/0x124 +[ 710.813950] do_el0_svc+0x3c/0xcc +[ 710.817275] el0_svc+0x60/0x12c +[ 710.820428] el0t_64_sync_handler+0xc0/0x13c +[ 710.824710] el0t_64_sync+0x18c/0x190 +[ 710.828386] Code: a9025bf5 f942c420 f9001fe0 d2800000 (b943ba62) +[ 710.834498] ---[ end trace 0000000000000000 ]--- +[ 710.875958] pstore: crypto_comp_compress failed, ret = -22! +[ 710.895047] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b +[ 710.902757] Kernel Offset: disabled +[ 710.906255] CPU features: 0x800,00004811,00001082 +[ 710.910971] Memory Limit: none +[ 710.927474] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]--- + +To avoid the problem check for NULL in usb_remove_hcd. + +Fixes: 4736ebd7fcaf ("usb: host: xhci-plat: omit shared hcd if either root hub has no ports") +Signed-off-by: Alexey Sheplyakov +Link: https://lore.kernel.org/r/20220722141700.1271439-1-asheplyakov@basealt.ru +Signed-off-by: Greg Kroah-Hartman +Acked-by: Takashi Iwai + +--- + drivers/usb/core/hcd.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c +index 06eea8848ccc..41dcd41e550c 100644 +--- a/drivers/usb/core/hcd.c ++++ b/drivers/usb/core/hcd.c +@@ -3033,9 +3033,15 @@ EXPORT_SYMBOL_GPL(usb_add_hcd); + */ + void usb_remove_hcd(struct usb_hcd *hcd) + { +- struct usb_device *rhdev = hcd->self.root_hub; ++ struct usb_device *rhdev; + bool rh_registered; + ++ if (!hcd) { ++ pr_debug("%s: hcd is NULL\n", __func__); ++ return; ++ } ++ rhdev = hcd->self.root_hub; ++ + dev_info(hcd->self.controller, "remove, state %x\n", hcd->state); + + usb_get_dev(rhdev); +-- +2.35.3 + diff --git a/patches.suse/usbnet-smsc95xx-Don-t-reset-PHY-behind-PHY-driver-s-.patch b/patches.suse/usbnet-smsc95xx-Don-t-reset-PHY-behind-PHY-driver-s-.patch new file mode 100644 index 0000000..d47d8fe --- /dev/null +++ b/patches.suse/usbnet-smsc95xx-Don-t-reset-PHY-behind-PHY-driver-s-.patch @@ -0,0 +1,65 @@ +From 14021da69811cc9bd680a83932614adf308ed0fe Mon Sep 17 00:00:00 2001 +From: Lukas Wunner +Date: Thu, 12 May 2022 10:42:03 +0200 +Subject: [PATCH] usbnet: smsc95xx: Don't reset PHY behind PHY driver's back +Git-commit: 14021da69811cc9bd680a83932614adf308ed0fe +References: git-fixes +Patch-mainline: v5.19-rc1 + +smsc95xx_reset() resets the PHY behind the PHY driver's back, which +seems like a bad idea generally. Remove that portion of the function. + +We're about to use PHY interrupts instead of polling to detect link +changes on SMSC LAN95xx chips. Because smsc95xx_reset() is called from +usbnet_open(), PHY interrupt settings are lost whenever the net_device +is brought up. + +There are two other callers of smsc95xx_reset(), namely smsc95xx_bind() +and smsc95xx_reset_resume(), and both may indeed benefit from a PHY +reset. However they already perform one through their calls to +phy_connect_direct() and phy_init_hw(). + +Tested-by: Oleksij Rempel # LAN9514/9512/9500 +Tested-by: Ferry Toth # LAN9514 +Signed-off-by: Lukas Wunner +Cc: Martyn Welch +Cc: Gabriel Hojda +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Oliver Neukum +--- + drivers/net/usb/smsc95xx.c | 18 ------------------ + 1 file changed, 18 deletions(-) + +diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c +index 2cb44d65bbc3..6c37c7adde1b 100644 +--- a/drivers/net/usb/smsc95xx.c ++++ b/drivers/net/usb/smsc95xx.c +@@ -887,24 +887,6 @@ static int smsc95xx_reset(struct usbnet *dev) + return ret; + } + +- ret = smsc95xx_write_reg(dev, PM_CTRL, PM_CTL_PHY_RST_); +- if (ret < 0) +- return ret; +- +- timeout = 0; +- do { +- msleep(10); +- ret = smsc95xx_read_reg(dev, PM_CTRL, &read_buf); +- if (ret < 0) +- return ret; +- timeout++; +- } while ((read_buf & PM_CTL_PHY_RST_) && (timeout < 100)); +- +- if (timeout >= 100) { +- netdev_warn(dev->net, "timeout waiting for PHY Reset\n"); +- return ret; +- } +- + ret = smsc95xx_set_mac_address(dev); + if (ret < 0) + return ret; +-- +2.35.3 + diff --git a/patches.suse/x86-bpf-Cleanup-the-top-of-file-header-in-bpf_jit_co.patch b/patches.suse/x86-bpf-Cleanup-the-top-of-file-header-in-bpf_jit_co.patch new file mode 100644 index 0000000..0f470b8 --- /dev/null +++ b/patches.suse/x86-bpf-Cleanup-the-top-of-file-header-in-bpf_jit_co.patch @@ -0,0 +1,33 @@ +From: Christoph Hellwig +Date: Fri, 19 Nov 2021 17:32:11 +0100 +Subject: x86, bpf: Cleanup the top of file header in bpf_jit_comp.c +Patch-mainline: v5.17-rc1 +Git-commit: 58ffa1b413690dbfdea86c068510339fe1573c33 +References: jsc#PED-1368 + +Don't bother mentioning the file name as it is implied, and remove the +reference to internal BPF. + +Signed-off-by: Christoph Hellwig +Signed-off-by: Alexei Starovoitov +Acked-by: Song Liu +Link: https://lore.kernel.org/bpf/20211119163215.971383-2-hch@lst.de +Acked-by: Shung-Hsi Yu +--- + arch/x86/net/bpf_jit_comp.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -1,9 +1,9 @@ + // SPDX-License-Identifier: GPL-2.0-only + /* +- * bpf_jit_comp.c: BPF JIT compiler ++ * BPF JIT compiler + * + * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) +- * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com ++ * Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + */ + #include + #include diff --git a/patches.suse/x86-fpu-Include-vmalloc.h-for-vzalloc.patch b/patches.suse/x86-fpu-Include-vmalloc.h-for-vzalloc.patch new file mode 100644 index 0000000..7e48448 --- /dev/null +++ b/patches.suse/x86-fpu-Include-vmalloc.h-for-vzalloc.patch @@ -0,0 +1,39 @@ +From: Stephen Rothwell +Date: Mon, 25 Oct 2021 15:04:13 +1100 +Subject: x86/fpu: Include vmalloc.h for vzalloc() +Patch-mainline: v5.16-rc1 +Git-commit: 868c250bb4639531ff33b2d879fbef39c1d9ed39 +References: git-fixes + +Explicitly include that header to avoid build errors when vzalloc() +becomes "invisible" to the compiler due to header reorganizations. + +This is not a problem in the tip tree but occurred when integrating +linux-next. + + [ bp: Commit message. ] + +Link: https://lore.kernel.org/r/20211025151144.552c60ca@canb.auug.org.au +Fixes: 69f6ed1d14c6 ("x86/fpu: Provide infrastructure for KVM FPU cleanup") +Signed-off-by: Stephen Rothwell +Signed-off-by: Borislav Petkov +Acked-by: Shung-Hsi Yu +--- + arch/x86/kernel/fpu/core.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c +index 290836d1f2a7..8ea306b1bf8e 100644 +--- a/arch/x86/kernel/fpu/core.c ++++ b/arch/x86/kernel/fpu/core.c +@@ -16,6 +16,7 @@ + + #include + #include ++#include + + #include "context.h" + #include "internal.h" +-- +2.38.1 + diff --git a/patches.suse/x86-speculation-include-unprivileged-ebpf-status-in-spectre-v2-mitigation-reporting.patch b/patches.suse/x86-speculation-include-unprivileged-ebpf-status-in-spectre-v2-mitigation-reporting.patch index 89e23f3..637a6e8 100644 --- a/patches.suse/x86-speculation-include-unprivileged-ebpf-status-in-spectre-v2-mitigation-reporting.patch +++ b/patches.suse/x86-speculation-include-unprivileged-ebpf-status-in-spectre-v2-mitigation-reporting.patch @@ -95,10 +95,10 @@ Reviewed-by: Thomas Gleixner return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); --- a/include/linux/bpf.h +++ b/include/linux/bpf.h -@@ -1595,6 +1595,11 @@ bool bpf_prog_has_kfunc_call(const struc - const struct btf_func_model * - bpf_jit_find_kfunc_model(const struct bpf_prog *prog, - const struct bpf_insn *insn); +@@ -1784,6 +1784,11 @@ struct bpf_core_ctx { + int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, + int relo_idx, void *insn); + +static inline bool unprivileged_ebpf_enabled(void) +{ + return !sysctl_unprivileged_bpf_disabled; @@ -107,7 +107,7 @@ Reviewed-by: Thomas Gleixner #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { -@@ -1807,6 +1812,12 @@ bpf_jit_find_kfunc_model(const struct bp +@@ -2003,6 +2008,12 @@ bpf_jit_find_kfunc_model(const struct bp { return NULL; } @@ -122,7 +122,7 @@ Reviewed-by: Thomas Gleixner void __bpf_free_used_btfs(struct bpf_prog_aux *aux, --- a/kernel/sysctl.c +++ b/kernel/sysctl.c -@@ -228,6 +228,10 @@ static int bpf_stats_handler(struct ctl_ +@@ -223,6 +223,10 @@ static int bpf_stats_handler(struct ctl_ return ret; } @@ -133,7 +133,7 @@ Reviewed-by: Thomas Gleixner static int bpf_unpriv_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { -@@ -245,6 +249,9 @@ static int bpf_unpriv_handler(struct ctl +@@ -240,6 +244,9 @@ static int bpf_unpriv_handler(struct ctl return -EPERM; *(int *)table->data = unpriv_enable; } diff --git a/patches.suse/xdp-Add-xdp_do_redirect_frame-for-pre-computed-xdp_f.patch b/patches.suse/xdp-Add-xdp_do_redirect_frame-for-pre-computed-xdp_f.patch new file mode 100644 index 0000000..5f24ce6 --- /dev/null +++ b/patches.suse/xdp-Add-xdp_do_redirect_frame-for-pre-computed-xdp_f.patch @@ -0,0 +1,137 @@ +From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= +Date: Mon, 3 Jan 2022 16:08:10 +0100 +Subject: xdp: Add xdp_do_redirect_frame() for pre-computed xdp_frames +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.17-rc1 +Git-commit: 1372d34ccf6dd480332b2bcb2fd59a2b9a0df415 +References: jsc#PED-1368 + +Add an xdp_do_redirect_frame() variant which supports pre-computed +xdp_frame structures. This will be used in bpf_prog_run() to avoid having +to write to the xdp_frame structure when the XDP program doesn't modify the +frame boundaries. + +Signed-off-by: Toke Høiland-Jørgensen +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220103150812.87914-6-toke@redhat.com +Acked-by: Shung-Hsi Yu +--- + include/linux/filter.h | 4 +++ + net/core/filter.c | 65 ++++++++++++++++++++++++++++++++++++++++--------- + 2 files changed, 58 insertions(+), 11 deletions(-) + +--- a/include/linux/filter.h ++++ b/include/linux/filter.h +@@ -1019,6 +1019,10 @@ int xdp_do_generic_redirect(struct net_d + int xdp_do_redirect(struct net_device *dev, + struct xdp_buff *xdp, + struct bpf_prog *prog); ++int xdp_do_redirect_frame(struct net_device *dev, ++ struct xdp_buff *xdp, ++ struct xdp_frame *xdpf, ++ struct bpf_prog *prog); + void xdp_do_flush(void); + + /* The xdp_do_flush_map() helper has been renamed to drop the _map suffix, as +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -3974,26 +3974,44 @@ u32 xdp_master_redirect(struct xdp_buff + } + EXPORT_SYMBOL_GPL(xdp_master_redirect); + +-int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, +- struct bpf_prog *xdp_prog) ++static inline int __xdp_do_redirect_xsk(struct bpf_redirect_info *ri, ++ struct net_device *dev, ++ struct xdp_buff *xdp, ++ struct bpf_prog *xdp_prog) + { +- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + enum bpf_map_type map_type = ri->map_type; + void *fwd = ri->tgt_value; + u32 map_id = ri->map_id; +- struct xdp_frame *xdpf; +- struct bpf_map *map; + int err; + + ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ + ri->map_type = BPF_MAP_TYPE_UNSPEC; + +- if (map_type == BPF_MAP_TYPE_XSKMAP) { +- err = __xsk_map_redirect(fwd, xdp); +- goto out; +- } ++ err = __xsk_map_redirect(fwd, xdp); ++ if (unlikely(err)) ++ goto err; ++ ++ _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index); ++ return 0; ++err: ++ _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err); ++ return err; ++} ++ ++static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, ++ struct net_device *dev, ++ struct xdp_frame *xdpf, ++ struct bpf_prog *xdp_prog) ++{ ++ enum bpf_map_type map_type = ri->map_type; ++ void *fwd = ri->tgt_value; ++ u32 map_id = ri->map_id; ++ struct bpf_map *map; ++ int err; ++ ++ ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ ++ ri->map_type = BPF_MAP_TYPE_UNSPEC; + +- xdpf = xdp_convert_buff_to_frame(xdp); + if (unlikely(!xdpf)) { + err = -EOVERFLOW; + goto err; +@@ -4030,7 +4048,6 @@ int xdp_do_redirect(struct net_device *d + err = -EBADRQC; + } + +-out: + if (unlikely(err)) + goto err; + +@@ -4040,8 +4057,34 @@ err: + _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err); + return err; + } ++ ++int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, ++ struct bpf_prog *xdp_prog) ++{ ++ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); ++ enum bpf_map_type map_type = ri->map_type; ++ ++ if (map_type == BPF_MAP_TYPE_XSKMAP) ++ return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog); ++ ++ return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp), ++ xdp_prog); ++} + EXPORT_SYMBOL_GPL(xdp_do_redirect); + ++int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp, ++ struct xdp_frame *xdpf, struct bpf_prog *xdp_prog) ++{ ++ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); ++ enum bpf_map_type map_type = ri->map_type; ++ ++ if (map_type == BPF_MAP_TYPE_XSKMAP) ++ return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog); ++ ++ return __xdp_do_redirect_frame(ri, dev, xdpf, xdp_prog); ++} ++EXPORT_SYMBOL_GPL(xdp_do_redirect_frame); ++ + static int xdp_do_generic_redirect_map(struct net_device *dev, + struct sk_buff *skb, + struct xdp_buff *xdp, diff --git a/patches.suse/xdp-Allow-registering-memory-model-without-rxq-refer.patch b/patches.suse/xdp-Allow-registering-memory-model-without-rxq-refer.patch new file mode 100644 index 0000000..625715e --- /dev/null +++ b/patches.suse/xdp-Allow-registering-memory-model-without-rxq-refer.patch @@ -0,0 +1,204 @@ +From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= +Date: Mon, 3 Jan 2022 16:08:06 +0100 +Subject: xdp: Allow registering memory model without rxq reference +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.17-rc1 +Git-commit: 4a48ef70b93b8c7ed5190adfca18849e76387b80 +References: jsc#PED-1368 + +The functions that register an XDP memory model take a struct xdp_rxq as +parameter, but the RXQ is not actually used for anything other than pulling +out the struct xdp_mem_info that it embeds. So refactor the register +functions and export variants that just take a pointer to the xdp_mem_info. + +This is in preparation for enabling XDP_REDIRECT in bpf_prog_run(), using a +page_pool instance that is not connected to any network device. + +Signed-off-by: Toke Høiland-Jørgensen +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220103150812.87914-2-toke@redhat.com +Acked-by: Shung-Hsi Yu +--- + include/net/xdp.h | 3 + + net/core/xdp.c | 92 ++++++++++++++++++++++++++++++++++++------------------ + 2 files changed, 65 insertions(+), 30 deletions(-) + +--- a/include/net/xdp.h ++++ b/include/net/xdp.h +@@ -260,6 +260,9 @@ bool xdp_rxq_info_is_reg(struct xdp_rxq_ + int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, + enum xdp_mem_type type, void *allocator); + void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq); ++int xdp_reg_mem_model(struct xdp_mem_info *mem, ++ enum xdp_mem_type type, void *allocator); ++void xdp_unreg_mem_model(struct xdp_mem_info *mem); + + /* Drivers not supporting XDP metadata can use this helper, which + * rejects any room expansion for metadata as a result. +--- a/net/core/xdp.c ++++ b/net/core/xdp.c +@@ -110,20 +110,15 @@ static void mem_allocator_disconnect(voi + mutex_unlock(&mem_id_lock); + } + +-void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) ++void xdp_unreg_mem_model(struct xdp_mem_info *mem) + { + struct xdp_mem_allocator *xa; +- int type = xdp_rxq->mem.type; +- int id = xdp_rxq->mem.id; ++ int type = mem->type; ++ int id = mem->id; + + /* Reset mem info to defaults */ +- xdp_rxq->mem.id = 0; +- xdp_rxq->mem.type = 0; +- +- if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { +- WARN(1, "Missing register, driver bug"); +- return; +- } ++ mem->id = 0; ++ mem->type = 0; + + if (id == 0) + return; +@@ -135,6 +130,17 @@ void xdp_rxq_info_unreg_mem_model(struct + rcu_read_unlock(); + } + } ++EXPORT_SYMBOL_GPL(xdp_unreg_mem_model); ++ ++void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) ++{ ++ if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { ++ WARN(1, "Missing register, driver bug"); ++ return; ++ } ++ ++ xdp_unreg_mem_model(&xdp_rxq->mem); ++} + EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model); + + void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) +@@ -261,28 +267,24 @@ static bool __is_supported_mem_type(enum + return true; + } + +-int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, +- enum xdp_mem_type type, void *allocator) ++static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem, ++ enum xdp_mem_type type, ++ void *allocator) + { + struct xdp_mem_allocator *xdp_alloc; + gfp_t gfp = GFP_KERNEL; + int id, errno, ret; + void *ptr; + +- if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { +- WARN(1, "Missing register, driver bug"); +- return -EFAULT; +- } +- + if (!__is_supported_mem_type(type)) +- return -EOPNOTSUPP; ++ return ERR_PTR(-EOPNOTSUPP); + +- xdp_rxq->mem.type = type; ++ mem->type = type; + + if (!allocator) { + if (type == MEM_TYPE_PAGE_POOL) +- return -EINVAL; /* Setup time check page_pool req */ +- return 0; ++ return ERR_PTR(-EINVAL); /* Setup time check page_pool req */ ++ return NULL; + } + + /* Delay init of rhashtable to save memory if feature isn't used */ +@@ -292,13 +294,13 @@ int xdp_rxq_info_reg_mem_model(struct xd + mutex_unlock(&mem_id_lock); + if (ret < 0) { + WARN_ON(1); +- return ret; ++ return ERR_PTR(ret); + } + } + + xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp); + if (!xdp_alloc) +- return -ENOMEM; ++ return ERR_PTR(-ENOMEM); + + mutex_lock(&mem_id_lock); + id = __mem_id_cyclic_get(gfp); +@@ -306,15 +308,15 @@ int xdp_rxq_info_reg_mem_model(struct xd + errno = id; + goto err; + } +- xdp_rxq->mem.id = id; +- xdp_alloc->mem = xdp_rxq->mem; ++ mem->id = id; ++ xdp_alloc->mem = *mem; + xdp_alloc->allocator = allocator; + + /* Insert allocator into ID lookup table */ + ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node); + if (IS_ERR(ptr)) { +- ida_simple_remove(&mem_id_pool, xdp_rxq->mem.id); +- xdp_rxq->mem.id = 0; ++ ida_simple_remove(&mem_id_pool, mem->id); ++ mem->id = 0; + errno = PTR_ERR(ptr); + goto err; + } +@@ -324,13 +326,43 @@ int xdp_rxq_info_reg_mem_model(struct xd + + mutex_unlock(&mem_id_lock); + +- trace_mem_connect(xdp_alloc, xdp_rxq); +- return 0; ++ return xdp_alloc; + err: + mutex_unlock(&mem_id_lock); + kfree(xdp_alloc); +- return errno; ++ return ERR_PTR(errno); ++} ++ ++int xdp_reg_mem_model(struct xdp_mem_info *mem, ++ enum xdp_mem_type type, void *allocator) ++{ ++ struct xdp_mem_allocator *xdp_alloc; ++ ++ xdp_alloc = __xdp_reg_mem_model(mem, type, allocator); ++ if (IS_ERR(xdp_alloc)) ++ return PTR_ERR(xdp_alloc); ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xdp_reg_mem_model); ++ ++int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, ++ enum xdp_mem_type type, void *allocator) ++{ ++ struct xdp_mem_allocator *xdp_alloc; ++ ++ if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { ++ WARN(1, "Missing register, driver bug"); ++ return -EFAULT; ++ } ++ ++ xdp_alloc = __xdp_reg_mem_model(&xdp_rxq->mem, type, allocator); ++ if (IS_ERR(xdp_alloc)) ++ return PTR_ERR(xdp_alloc); ++ ++ trace_mem_connect(xdp_alloc, xdp_rxq); ++ return 0; + } ++ + EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); + + /* XDP RX runs under NAPI protection, and in different delivery error diff --git a/patches.suse/xdp-Move-conversion-to-xdp_frame-out-of-map-function.patch b/patches.suse/xdp-Move-conversion-to-xdp_frame-out-of-map-function.patch new file mode 100644 index 0000000..70672d2 --- /dev/null +++ b/patches.suse/xdp-Move-conversion-to-xdp_frame-out-of-map-function.patch @@ -0,0 +1,289 @@ +From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= +Date: Mon, 3 Jan 2022 16:08:09 +0100 +Subject: xdp: Move conversion to xdp_frame out of map functions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.17-rc1 +Git-commit: d53ad5d8b218a885e95080d4d3d556b16b91b1b9 +References: jsc#PED-1368 + +All map redirect functions except XSK maps convert xdp_buff to xdp_frame +before enqueueing it. So move this conversion of out the map functions +and into xdp_do_redirect(). This removes a bit of duplicated code, but more +importantly it makes it possible to support caller-allocated xdp_frame +structures, which will be added in a subsequent commit. + +Signed-off-by: Toke Høiland-Jørgensen +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220103150812.87914-5-toke@redhat.com +Acked-by: Shung-Hsi Yu +--- + include/linux/bpf.h | 20 ++++++++++---------- + kernel/bpf/cpumap.c | 8 +------- + kernel/bpf/devmap.c | 32 +++++++++++--------------------- + net/core/filter.c | 24 +++++++++++++++++------- + 4 files changed, 39 insertions(+), 45 deletions(-) + +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -1670,17 +1670,17 @@ void bpf_patch_call_args(struct bpf_insn + struct btf *bpf_get_btf_vmlinux(void); + + /* Map specifics */ +-struct xdp_buff; ++struct xdp_frame; + struct sk_buff; + struct bpf_dtab_netdev; + struct bpf_cpu_map_entry; + + void __dev_flush(void); +-int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, ++int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, + struct net_device *dev_rx); +-int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, ++int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, + struct net_device *dev_rx); +-int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, ++int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, + struct bpf_map *map, bool exclude_ingress); + int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, + struct bpf_prog *xdp_prog); +@@ -1689,7 +1689,7 @@ int dev_map_redirect_multi(struct net_de + bool exclude_ingress); + + void __cpu_map_flush(void); +-int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, ++int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, + struct net_device *dev_rx); + int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, + struct sk_buff *skb); +@@ -1867,26 +1867,26 @@ static inline void __dev_flush(void) + { + } + +-struct xdp_buff; ++struct xdp_frame; + struct bpf_dtab_netdev; + struct bpf_cpu_map_entry; + + static inline +-int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, ++int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, + struct net_device *dev_rx) + { + return 0; + } + + static inline +-int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, ++int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, + struct net_device *dev_rx) + { + return 0; + } + + static inline +-int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, ++int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, + struct bpf_map *map, bool exclude_ingress) + { + return 0; +@@ -1914,7 +1914,7 @@ static inline void __cpu_map_flush(void) + } + + static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, +- struct xdp_buff *xdp, ++ struct xdp_frame *xdpf, + struct net_device *dev_rx) + { + return 0; +--- a/kernel/bpf/cpumap.c ++++ b/kernel/bpf/cpumap.c +@@ -746,15 +746,9 @@ static void bq_enqueue(struct bpf_cpu_ma + list_add(&bq->flush_node, flush_list); + } + +-int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, ++int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, + struct net_device *dev_rx) + { +- struct xdp_frame *xdpf; +- +- xdpf = xdp_convert_buff_to_frame(xdp); +- if (unlikely(!xdpf)) +- return -EOVERFLOW; +- + /* Info needed when constructing SKB on remote CPU */ + xdpf->dev_rx = dev_rx; + +--- a/kernel/bpf/devmap.c ++++ b/kernel/bpf/devmap.c +@@ -467,24 +467,19 @@ static void bq_enqueue(struct net_device + bq->q[bq->count++] = xdpf; + } + +-static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, ++static inline int __xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, + struct net_device *dev_rx, + struct bpf_prog *xdp_prog) + { +- struct xdp_frame *xdpf; + int err; + + if (!dev->netdev_ops->ndo_xdp_xmit) + return -EOPNOTSUPP; + +- err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data); ++ err = xdp_ok_fwd_dev(dev, xdpf->len); + if (unlikely(err)) + return err; + +- xdpf = xdp_convert_buff_to_frame(xdp); +- if (unlikely(!xdpf)) +- return -EOVERFLOW; +- + bq_enqueue(dev, xdpf, dev_rx, xdp_prog); + return 0; + } +@@ -520,27 +515,27 @@ static u32 dev_map_bpf_prog_run_skb(stru + return act; + } + +-int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, ++int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, + struct net_device *dev_rx) + { +- return __xdp_enqueue(dev, xdp, dev_rx, NULL); ++ return __xdp_enqueue(dev, xdpf, dev_rx, NULL); + } + +-int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, ++int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, + struct net_device *dev_rx) + { + struct net_device *dev = dst->dev; + +- return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog); ++ return __xdp_enqueue(dev, xdpf, dev_rx, dst->xdp_prog); + } + +-static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp) ++static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf) + { + if (!obj || + !obj->dev->netdev_ops->ndo_xdp_xmit) + return false; + +- if (xdp_ok_fwd_dev(obj->dev, xdp->data_end - xdp->data)) ++ if (xdp_ok_fwd_dev(obj->dev, xdpf->len)) + return false; + + return true; +@@ -586,14 +581,13 @@ static int get_upper_ifindexes(struct ne + return n; + } + +-int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, ++int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, + struct bpf_map *map, bool exclude_ingress) + { + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + struct bpf_dtab_netdev *dst, *last_dst = NULL; + int excluded_devices[1+MAX_NEST_DEV]; + struct hlist_head *head; +- struct xdp_frame *xdpf; + int num_excluded = 0; + unsigned int i; + int err; +@@ -603,15 +597,11 @@ int dev_map_enqueue_multi(struct xdp_buf + excluded_devices[num_excluded++] = dev_rx->ifindex; + } + +- xdpf = xdp_convert_buff_to_frame(xdp); +- if (unlikely(!xdpf)) +- return -EOVERFLOW; +- + if (map->map_type == BPF_MAP_TYPE_DEVMAP) { + for (i = 0; i < map->max_entries; i++) { + dst = rcu_dereference_check(dtab->netdev_map[i], + rcu_read_lock_bh_held()); +- if (!is_valid_dst(dst, xdp)) ++ if (!is_valid_dst(dst, xdpf)) + continue; + + if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex)) +@@ -634,7 +624,7 @@ int dev_map_enqueue_multi(struct xdp_buf + head = dev_map_index_hash(dtab, i); + hlist_for_each_entry_rcu(dst, head, index_hlist, + lockdep_is_held(&dtab->index_lock)) { +- if (!is_valid_dst(dst, xdp)) ++ if (!is_valid_dst(dst, xdpf)) + continue; + + if (is_ifindex_excluded(excluded_devices, num_excluded, +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -3981,12 +3981,24 @@ int xdp_do_redirect(struct net_device *d + enum bpf_map_type map_type = ri->map_type; + void *fwd = ri->tgt_value; + u32 map_id = ri->map_id; ++ struct xdp_frame *xdpf; + struct bpf_map *map; + int err; + + ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ + ri->map_type = BPF_MAP_TYPE_UNSPEC; + ++ if (map_type == BPF_MAP_TYPE_XSKMAP) { ++ err = __xsk_map_redirect(fwd, xdp); ++ goto out; ++ } ++ ++ xdpf = xdp_convert_buff_to_frame(xdp); ++ if (unlikely(!xdpf)) { ++ err = -EOVERFLOW; ++ goto err; ++ } ++ + switch (map_type) { + case BPF_MAP_TYPE_DEVMAP: + fallthrough; +@@ -3994,17 +4006,14 @@ int xdp_do_redirect(struct net_device *d + map = READ_ONCE(ri->map); + if (unlikely(map)) { + WRITE_ONCE(ri->map, NULL); +- err = dev_map_enqueue_multi(xdp, dev, map, ++ err = dev_map_enqueue_multi(xdpf, dev, map, + ri->flags & BPF_F_EXCLUDE_INGRESS); + } else { +- err = dev_map_enqueue(fwd, xdp, dev); ++ err = dev_map_enqueue(fwd, xdpf, dev); + } + break; + case BPF_MAP_TYPE_CPUMAP: +- err = cpu_map_enqueue(fwd, xdp, dev); +- break; +- case BPF_MAP_TYPE_XSKMAP: +- err = __xsk_map_redirect(fwd, xdp); ++ err = cpu_map_enqueue(fwd, xdpf, dev); + break; + case BPF_MAP_TYPE_UNSPEC: + if (map_id == INT_MAX) { +@@ -4013,7 +4022,7 @@ int xdp_do_redirect(struct net_device *d + err = -EINVAL; + break; + } +- err = dev_xdp_enqueue(fwd, xdp, dev); ++ err = dev_xdp_enqueue(fwd, xdpf, dev); + break; + } + fallthrough; +@@ -4021,6 +4030,7 @@ int xdp_do_redirect(struct net_device *d + err = -EBADRQC; + } + ++out: + if (unlikely(err)) + goto err; + diff --git a/patches.suse/xdp-check-prog-type-before-updating-BPF-link.patch b/patches.suse/xdp-check-prog-type-before-updating-BPF-link.patch new file mode 100644 index 0000000..dcebddb --- /dev/null +++ b/patches.suse/xdp-check-prog-type-before-updating-BPF-link.patch @@ -0,0 +1,44 @@ +From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= +Date: Fri, 7 Jan 2022 23:11:13 +0100 +Subject: xdp: check prog type before updating BPF link +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Patch-mainline: v5.17-rc1 +Git-commit: 382778edc8262b7535f00523e9eb22edba1b9816 +References: jsc#PED-1368 + +The bpf_xdp_link_update() function didn't check the program type before +updating the program, which made it possible to install any program type as +an XDP program, which is obviously not good. Syzbot managed to trigger this +by swapping in an LWT program on the XDP hook which would crash in a helper +call. + +Fix this by adding a check and bailing out if the types don't match. + +Fixes: 026a4c28e1db ("bpf, xdp: Implement LINK_UPDATE for BPF XDP link") +Reported-by: syzbot+983941aa85af6ded1fd9@syzkaller.appspotmail.com +Acked-by: Andrii Nakryiko +Signed-off-by: Toke Høiland-Jørgensen +Link: https://lore.kernel.org/r/20220107221115.326171-1-toke@redhat.com +Signed-off-by: Alexei Starovoitov +Acked-by: Shung-Hsi Yu +--- + net/core/dev.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -8978,6 +8978,12 @@ static int bpf_xdp_link_update(struct bp + goto out_unlock; + } + old_prog = link->prog; ++ if (old_prog->type != new_prog->type || ++ old_prog->expected_attach_type != new_prog->expected_attach_type) { ++ err = -EINVAL; ++ goto out_unlock; ++ } ++ + if (old_prog == new_prog) { + /* no-op, don't disturb drivers */ + bpf_prog_put(new_prog); diff --git a/patches.suse/xen-blkfront-add-error-handling-support-for-add_disk.patch b/patches.suse/xen-blkfront-add-error-handling-support-for-add_disk.patch new file mode 100644 index 0000000..38e8315 --- /dev/null +++ b/patches.suse/xen-blkfront-add-error-handling-support-for-add_disk.patch @@ -0,0 +1,49 @@ +From: Luis Chamberlain +Date: Fri, 15 Oct 2021 16:30:24 -0700 +Subject: [PATCH] xen-blkfront: add error handling support for add_disk() +Git-commit: 293a7c528803321479593d42d0898bb5a9769db1 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on device_add_disk() as this function +returned void. Now that this is fixed, use the shiny new error +handling. The function xlvbd_alloc_gendisk() typically does the +unwinding on error on allocating the disk and creating the tag, +but since all that error handling was stuffed inside +xlvbd_alloc_gendisk() we must repeat the tag free'ing as well. + +We set the info->rq to NULL to ensure blkif_free() doesn't crash +on blk_mq_stop_hw_queues() on device_add_disk() error as the queue +will be long gone by then. + +Reviewed-by: Juergen Gross +Signed-off-by: Luis Chamberlain +Link: https://lore.kernel.org/r/20211015233028.2167651-6-mcgrof@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + drivers/block/xen-blkfront.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c +index df0deb927760..8e3983e456f3 100644 +--- a/drivers/block/xen-blkfront.c ++++ b/drivers/block/xen-blkfront.c +@@ -2386,7 +2386,13 @@ static void blkfront_connect(struct blkfront_info *info) + for_each_rinfo(info, rinfo, i) + kick_pending_request_queues(rinfo); + +- device_add_disk(&info->xbdev->dev, info->gd, NULL); ++ err = device_add_disk(&info->xbdev->dev, info->gd, NULL); ++ if (err) { ++ blk_cleanup_disk(info->gd); ++ blk_mq_free_tag_set(&info->tag_set); ++ info->rq = NULL; ++ goto fail; ++ } + + info->is_ready = 1; + return; +-- +2.35.3 + diff --git a/patches.suse/xfs-convert-XLOG_FORCED_SHUTDOWN-to-xlog_is_shutdown.patch b/patches.suse/xfs-convert-XLOG_FORCED_SHUTDOWN-to-xlog_is_shutdown.patch new file mode 100644 index 0000000..df8a986 --- /dev/null +++ b/patches.suse/xfs-convert-XLOG_FORCED_SHUTDOWN-to-xlog_is_shutdown.patch @@ -0,0 +1,293 @@ +From 2039a272300b949c05888428877317b834c0b1fb Mon Sep 17 00:00:00 2001 +From: Dave Chinner +Date: Tue, 10 Aug 2021 17:59:01 -0700 +Subject: [PATCH] xfs: convert XLOG_FORCED_SHUTDOWN() to xlog_is_shutdown() +Git-commit: 2039a272300b949c05888428877317b834c0b1fb +Patch-mainline: v5.15-rc1 +References: git-fixes + +Make it less shouty and a static inline before adding more calls +through the log code. + +Also convert internal log code that uses XFS_FORCED_SHUTDOWN(mount) +to use xlog_is_shutdown(log) as well. + +Signed-off-by: Dave Chinner +Reviewed-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Darrick J. Wong +Acked-by: Anthony Iliopoulos + +--- + fs/xfs/xfs_log.c | 32 ++++++++++++++++---------------- + fs/xfs/xfs_log_cil.c | 10 +++++----- + fs/xfs/xfs_log_priv.h | 7 +++++-- + fs/xfs/xfs_log_recover.c | 9 +++------ + fs/xfs/xfs_trans.c | 2 +- + 5 files changed, 30 insertions(+), 30 deletions(-) + +diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c +index cc2a0ccfcc30..e2dc8acf48bc 100644 +--- a/fs/xfs/xfs_log.c ++++ b/fs/xfs/xfs_log.c +@@ -246,7 +246,7 @@ xlog_grant_head_wait( + list_add_tail(&tic->t_queue, &head->waiters); + + do { +- if (XLOG_FORCED_SHUTDOWN(log)) ++ if (xlog_is_shutdown(log)) + goto shutdown; + xlog_grant_push_ail(log, need_bytes); + +@@ -260,7 +260,7 @@ xlog_grant_head_wait( + trace_xfs_log_grant_wake(log, tic); + + spin_lock(&head->lock); +- if (XLOG_FORCED_SHUTDOWN(log)) ++ if (xlog_is_shutdown(log)) + goto shutdown; + } while (xlog_space_left(log, &head->grant) < need_bytes); + +@@ -365,7 +365,7 @@ xfs_log_writable( + return false; + if (xfs_readonly_buftarg(mp->m_log->l_targ)) + return false; +- if (XFS_FORCED_SHUTDOWN(mp)) ++ if (xlog_is_shutdown(mp->m_log)) + return false; + return true; + } +@@ -382,7 +382,7 @@ xfs_log_regrant( + int need_bytes; + int error = 0; + +- if (XLOG_FORCED_SHUTDOWN(log)) ++ if (xlog_is_shutdown(log)) + return -EIO; + + XFS_STATS_INC(mp, xs_try_logspace); +@@ -450,7 +450,7 @@ xfs_log_reserve( + + ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); + +- if (XLOG_FORCED_SHUTDOWN(log)) ++ if (xlog_is_shutdown(log)) + return -EIO; + + XFS_STATS_INC(mp, xs_try_logspace); +@@ -830,7 +830,7 @@ xlog_wait_on_iclog( + struct xlog *log = iclog->ic_log; + + trace_xlog_iclog_wait_on(iclog, _RET_IP_); +- if (!XLOG_FORCED_SHUTDOWN(log) && ++ if (!xlog_is_shutdown(log) && + iclog->ic_state != XLOG_STATE_ACTIVE && + iclog->ic_state != XLOG_STATE_DIRTY) { + XFS_STATS_INC(log->l_mp, xs_log_force_sleep); +@@ -839,7 +839,7 @@ xlog_wait_on_iclog( + spin_unlock(&log->l_icloglock); + } + +- if (XLOG_FORCED_SHUTDOWN(log)) ++ if (xlog_is_shutdown(log)) + return -EIO; + return 0; + } +@@ -940,7 +940,7 @@ xfs_log_unmount_write( + + xfs_log_force(mp, XFS_LOG_SYNC); + +- if (XLOG_FORCED_SHUTDOWN(log)) ++ if (xlog_is_shutdown(log)) + return; + + /* +@@ -1063,7 +1063,7 @@ xfs_log_space_wake( + struct xlog *log = mp->m_log; + int free_bytes; + +- if (XLOG_FORCED_SHUTDOWN(log)) ++ if (xlog_is_shutdown(log)) + return; + + if (!list_empty_careful(&log->l_write_head.waiters)) { +@@ -1154,7 +1154,7 @@ xfs_log_cover( + + ASSERT((xlog_cil_empty(mp->m_log) && xlog_iclogs_empty(mp->m_log) && + !xfs_ail_min_lsn(mp->m_log->l_ailp)) || +- XFS_FORCED_SHUTDOWN(mp)); ++ xlog_is_shutdown(mp->m_log)); + + if (!xfs_log_writable(mp)) + return 0; +@@ -1614,7 +1614,7 @@ xlog_commit_record( + }; + int error; + +- if (XLOG_FORCED_SHUTDOWN(log)) ++ if (xlog_is_shutdown(log)) + return -EIO; + + error = xlog_write(log, &vec, ticket, lsn, iclog, XLOG_COMMIT_TRANS); +@@ -1695,7 +1695,7 @@ xlog_grant_push_ail( + xfs_lsn_t threshold_lsn; + + threshold_lsn = xlog_grant_push_threshold(log, need_bytes); +- if (threshold_lsn == NULLCOMMITLSN || XLOG_FORCED_SHUTDOWN(log)) ++ if (threshold_lsn == NULLCOMMITLSN || xlog_is_shutdown(log)) + return; + + /* +@@ -2886,7 +2886,7 @@ xlog_state_do_callback( + cycled_icloglock = true; + + spin_lock(&log->l_icloglock); +- if (XLOG_FORCED_SHUTDOWN(log)) ++ if (xlog_is_shutdown(log)) + wake_up_all(&iclog->ic_force_wait); + else + xlog_state_clean_iclog(log, iclog); +@@ -2938,7 +2938,7 @@ xlog_state_done_syncing( + * split log writes, on the second, we shut down the file system and + * no iclogs should ever be attempted to be written to disk again. + */ +- if (!XLOG_FORCED_SHUTDOWN(log)) { ++ if (!xlog_is_shutdown(log)) { + ASSERT(iclog->ic_state == XLOG_STATE_SYNCING); + iclog->ic_state = XLOG_STATE_DONE_SYNC; + } +@@ -2986,7 +2986,7 @@ xlog_state_get_iclog_space( + + restart: + spin_lock(&log->l_icloglock); +- if (XLOG_FORCED_SHUTDOWN(log)) { ++ if (xlog_is_shutdown(log)) { + spin_unlock(&log->l_icloglock); + return -EIO; + } +@@ -3877,7 +3877,7 @@ xfs_log_force_umount( + * No need to get locks for this. + */ + if (logerror && log->l_iclog->ic_state == XLOG_STATE_IOERROR) { +- ASSERT(XLOG_FORCED_SHUTDOWN(log)); ++ ASSERT(xlog_is_shutdown(log)); + return 1; + } + +diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c +index 4e41130f206f..086e89334b5e 100644 +--- a/fs/xfs/xfs_log_cil.c ++++ b/fs/xfs/xfs_log_cil.c +@@ -584,7 +584,7 @@ xlog_cil_committed( + struct xfs_cil_ctx *ctx) + { + struct xfs_mount *mp = ctx->cil->xc_log->l_mp; +- bool abort = XLOG_FORCED_SHUTDOWN(ctx->cil->xc_log); ++ bool abort = xlog_is_shutdown(ctx->cil->xc_log); + + /* + * If the I/O failed, we're aborting the commit and already shutdown. +@@ -862,7 +862,7 @@ xlog_cil_push_work( + * shutdown, but then went back to sleep once already in the + * shutdown state. + */ +- if (XLOG_FORCED_SHUTDOWN(log)) { ++ if (xlog_is_shutdown(log)) { + spin_unlock(&cil->xc_push_lock); + goto out_abort_free_ticket; + } +@@ -971,7 +971,7 @@ xlog_cil_push_work( + out_abort_free_ticket: + xfs_log_ticket_ungrant(log, tic); + out_abort: +- ASSERT(XLOG_FORCED_SHUTDOWN(log)); ++ ASSERT(xlog_is_shutdown(log)); + xlog_cil_committed(ctx); + } + +@@ -1124,7 +1124,7 @@ xlog_cil_commit( + + xlog_cil_insert_items(log, tp); + +- if (regrant && !XLOG_FORCED_SHUTDOWN(log)) ++ if (regrant && !xlog_is_shutdown(log)) + xfs_log_ticket_regrant(log, tp->t_ticket); + else + xfs_log_ticket_ungrant(log, tp->t_ticket); +@@ -1197,7 +1197,7 @@ xlog_cil_force_seq( + * shutdown, but then went back to sleep once already in the + * shutdown state. + */ +- if (XLOG_FORCED_SHUTDOWN(log)) ++ if (xlog_is_shutdown(log)) + goto out_shutdown; + if (ctx->sequence > sequence) + continue; +diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h +index 6953f86f866c..e29fcb12dcb1 100644 +--- a/fs/xfs/xfs_log_priv.h ++++ b/fs/xfs/xfs_log_priv.h +@@ -464,8 +464,11 @@ struct xlog { + #define XLOG_BUF_CANCEL_BUCKET(log, blkno) \ + ((log)->l_buf_cancel_table + ((uint64_t)blkno % XLOG_BC_TABLE_SIZE)) + +-#define XLOG_FORCED_SHUTDOWN(log) \ +- (unlikely((log)->l_flags & XLOG_IO_ERROR)) ++static inline bool ++xlog_is_shutdown(struct xlog *log) ++{ ++ return (log->l_flags & XLOG_IO_ERROR); ++} + + /* common routines */ + extern int +diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c +index 1d4213302078..e6589cf4d09f 100644 +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -146,7 +146,7 @@ xlog_do_io( + + error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no, + BBTOB(nbblks), data, op); +- if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) { ++ if (error && !xlog_is_shutdown(log)) { + xfs_alert(log->l_mp, + "log recovery %s I/O error at daddr 0x%llx len %d error %d", + op == REQ_OP_WRITE ? "write" : "read", +@@ -3313,10 +3313,7 @@ xlog_do_recover( + if (error) + return error; + +- /* +- * If IO errors happened during recovery, bail out. +- */ +- if (XFS_FORCED_SHUTDOWN(mp)) ++ if (xlog_is_shutdown(log)) + return -EIO; + + /* +@@ -3338,7 +3335,7 @@ xlog_do_recover( + xfs_buf_hold(bp); + error = _xfs_buf_read(bp, XBF_READ); + if (error) { +- if (!XFS_FORCED_SHUTDOWN(mp)) { ++ if (!xlog_is_shutdown(log)) { + xfs_buf_ioerror_alert(bp, __this_address); + ASSERT(0); + } +diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c +index 83abaa219616..b52394b0e1f4 100644 +--- a/fs/xfs/xfs_trans.c ++++ b/fs/xfs/xfs_trans.c +@@ -905,7 +905,7 @@ __xfs_trans_commit( + */ + xfs_trans_unreserve_and_mod_dquots(tp); + if (tp->t_ticket) { +- if (regrant && !XLOG_FORCED_SHUTDOWN(mp->m_log)) ++ if (regrant && !xlog_is_shutdown(mp->m_log)) + xfs_log_ticket_regrant(mp->m_log, tp->t_ticket); + else + xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); +-- +2.35.3 + diff --git a/patches.suse/xfs-fix-perag-reference-leak-on-iteration-race-with-.patch b/patches.suse/xfs-fix-perag-reference-leak-on-iteration-race-with-.patch new file mode 100644 index 0000000..5415bcb --- /dev/null +++ b/patches.suse/xfs-fix-perag-reference-leak-on-iteration-race-with-.patch @@ -0,0 +1,90 @@ +From 892a666fafa19ab04b5e948f6c92f98f1dafb489 Mon Sep 17 00:00:00 2001 +From: Brian Foster +Date: Thu, 14 Oct 2021 12:56:10 -0700 +Subject: [PATCH] xfs: fix perag reference leak on iteration race with growfs +Git-commit: 892a666fafa19ab04b5e948f6c92f98f1dafb489 +Patch-mainline: v5.16-rc1 +References: git-fixes + +The for_each_perag*() set of macros are hacky in that some (i.e. +those based on sb_agcount) rely on the assumption that perag +iteration terminates naturally with a NULL perag at the specified +end_agno. Others allow for the final AG to have a valid perag and +require the calling function to clean up any potential leftover +xfs_perag reference on termination of the loop. + +Aside from providing a subtly inconsistent interface, the former +variant is racy with growfs because growfs can create discoverable +post-eofs perags before the final superblock update that completes +the grow operation and increases sb_agcount. This leads to the +following assert failure (reproduced by xfs/104) in the perag free +path during unmount: + + XFS: Assertion failed: atomic_read(&pag->pag_ref) == 0, file: fs/xfs/libxfs/xfs_ag.c, line: 195 + +This occurs because one of the many for_each_perag() loops in the +code that is expected to terminate with a NULL pag (and thus has no +post-loop xfs_perag_put() check) raced with a growfs and found a +non-NULL post-EOFS perag, but terminated naturally based on the +end_agno check without releasing the post-EOFS perag. + +Rework the iteration logic to lift the agno check from the main for +loop conditional to the iteration helper function. The for loop now +purely terminates on a NULL pag and xfs_perag_next() avoids taking a +reference to any perag beyond end_agno in the first place. + +Fixes: f250eedcf762 ("xfs: make for_each_perag... a first class citizen") +Signed-off-by: Brian Foster +Reviewed-by: Dave Chinner +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Anthony Iliopoulos + +--- + fs/xfs/libxfs/xfs_ag.h | 16 ++++++---------- + 1 file changed, 6 insertions(+), 10 deletions(-) + +diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h +index 4585ebb3f450..3f597cad2c33 100644 +--- a/fs/xfs/libxfs/xfs_ag.h ++++ b/fs/xfs/libxfs/xfs_ag.h +@@ -116,30 +116,26 @@ void xfs_perag_put(struct xfs_perag *pag); + + /* + * Perag iteration APIs +- * +- * XXX: for_each_perag_range() usage really needs an iterator to clean up when +- * we terminate at end_agno because we may have taken a reference to the perag +- * beyond end_agno. Right now callers have to be careful to catch and clean that +- * up themselves. This is not necessary for the callers of for_each_perag() and +- * for_each_perag_from() because they terminate at sb_agcount where there are +- * no perag structures in tree beyond end_agno. + */ + static inline struct xfs_perag * + xfs_perag_next( + struct xfs_perag *pag, +- xfs_agnumber_t *agno) ++ xfs_agnumber_t *agno, ++ xfs_agnumber_t end_agno) + { + struct xfs_mount *mp = pag->pag_mount; + + *agno = pag->pag_agno + 1; + xfs_perag_put(pag); ++ if (*agno > end_agno) ++ return NULL; + return xfs_perag_get(mp, *agno); + } + + #define for_each_perag_range(mp, agno, end_agno, pag) \ + for ((pag) = xfs_perag_get((mp), (agno)); \ +- (pag) != NULL && (agno) <= (end_agno); \ +- (pag) = xfs_perag_next((pag), &(agno))) ++ (pag) != NULL; \ ++ (pag) = xfs_perag_next((pag), &(agno), (end_agno))) + + #define for_each_perag_from(mp, agno, pag) \ + for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag)) +-- +2.35.3 + diff --git a/patches.suse/xfs-fix-xfs_ifree-error-handling-to-not-leak-perag-r.patch b/patches.suse/xfs-fix-xfs_ifree-error-handling-to-not-leak-perag-r.patch new file mode 100644 index 0000000..34da856 --- /dev/null +++ b/patches.suse/xfs-fix-xfs_ifree-error-handling-to-not-leak-perag-r.patch @@ -0,0 +1,41 @@ +From 6f5097e3367a7c0751e165e4c15bc30511a4ba38 Mon Sep 17 00:00:00 2001 +From: Brian Foster +Date: Mon, 30 May 2022 10:56:33 +1000 +Subject: [PATCH] xfs: fix xfs_ifree() error handling to not leak perag ref +Git-commit: 6f5097e3367a7c0751e165e4c15bc30511a4ba38 +Patch-mainline: v5.19-rc1 +References: git-fixes + +For some reason commit 9a5280b312e2e ("xfs: reorder iunlink remove +operation in xfs_ifree") replaced a jump to the exit path in the +event of an xfs_difree() error with a direct return, which skips +releasing the perag reference acquired at the top of the function. +Restore the original code to drop the reference on error. + +Fixes: 9a5280b312e2e ("xfs: reorder iunlink remove operation in xfs_ifree") +Signed-off-by: Brian Foster +Reviewed-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Signed-off-by: Dave Chinner +Acked-by: Anthony Iliopoulos + +--- + fs/xfs/xfs_inode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c +index b2879870a17e..52d6f2c7d58b 100644 +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -2622,7 +2622,7 @@ xfs_ifree( + */ + error = xfs_difree(tp, pag, ip->i_ino, &xic); + if (error) +- return error; ++ goto out; + + error = xfs_iunlink_remove(tp, pag, ip); + if (error) +-- +2.35.3 + diff --git a/patches.suse/xfs-move-recovery-needed-state-updates-to-xfs_log_mo.patch b/patches.suse/xfs-move-recovery-needed-state-updates-to-xfs_log_mo.patch index 1fb5ab6..e356cea 100644 --- a/patches.suse/xfs-move-recovery-needed-state-updates-to-xfs_log_mo.patch +++ b/patches.suse/xfs-move-recovery-needed-state-updates-to-xfs_log_mo.patch @@ -76,7 +76,7 @@ index 08fef1e998ea..8edfd35317d1 100644 /* Make sure the log is dead if we're returning failure. */ - ASSERT(!error || (mp->m_log->l_flags & XLOG_IO_ERROR)); -+ ASSERT(!error || XLOG_FORCED_SHUTDOWN(log)); ++ ASSERT(!error || xlog_is_shutdown(log)); return error; } diff --git a/patches.suse/xfs-reserve-quota-for-dir-expansion-when-linking-unl.patch b/patches.suse/xfs-reserve-quota-for-dir-expansion-when-linking-unl.patch new file mode 100644 index 0000000..b23796b --- /dev/null +++ b/patches.suse/xfs-reserve-quota-for-dir-expansion-when-linking-unl.patch @@ -0,0 +1,247 @@ +From 871b9316e7a778ff97bdc34fdb2f2977f616651d Mon Sep 17 00:00:00 2001 +From: "Darrick J. Wong" +Date: Fri, 25 Feb 2022 16:18:41 -0800 +Subject: [PATCH] xfs: reserve quota for dir expansion when linking/unlinking + files +Git-commit: 871b9316e7a778ff97bdc34fdb2f2977f616651d +Patch-mainline: v5.18-rc1 +References: bsc#1205616 + +XFS does not reserve quota for directory expansion when linking or +unlinking children from a directory. This means that we don't reject +the expansion with EDQUOT when we're at or near a hard limit, which +means that unprivileged userspace can use link()/unlink() to exceed +quota. + +The fix for this is nuanced -- link operations don't always expand the +directory, and we allow a link to proceed with no space reservation if +we don't need to add a block to the directory to handle the addition. +Unlink operations generally do not expand the directory (you'd have to +free a block and then cause a btree split) and we can defer the +directory block freeing if there is no space reservation. + +Moreover, there is a further bug in that we do not trigger the blockgc +workers to try to clear space when we're out of quota. + +To fix both cases, create a new xfs_trans_alloc_dir function that +allocates the transaction, locks and joins the inodes, and reserves +quota for the directory. If there isn't sufficient space or quota, +we'll switch the caller to reservationless mode. This should prevent +quota usage overruns with the least restriction in functionality. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Acked-by: Anthony Iliopoulos + +--- + fs/xfs/xfs_inode.c | 46 +++++++++---------------- + fs/xfs/xfs_trans.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++ + fs/xfs/xfs_trans.h | 3 ++ + 3 files changed, 106 insertions(+), 29 deletions(-) + +diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c +index 04bf467b1090..766a621b970d 100644 +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -1217,7 +1217,7 @@ xfs_link( + { + xfs_mount_t *mp = tdp->i_mount; + xfs_trans_t *tp; +- int error; ++ int error, nospace_error = 0; + int resblks; + + trace_xfs_link(tdp, target_name); +@@ -1236,19 +1236,11 @@ xfs_link( + goto std_return; + + resblks = XFS_LINK_SPACE_RES(mp, target_name->len); +- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, resblks, 0, 0, &tp); +- if (error == -ENOSPC) { +- resblks = 0; +- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, 0, 0, 0, &tp); +- } ++ error = xfs_trans_alloc_dir(tdp, &M_RES(mp)->tr_link, sip, &resblks, ++ &tp, &nospace_error); + if (error) + goto std_return; + +- xfs_lock_two_inodes(sip, XFS_ILOCK_EXCL, tdp, XFS_ILOCK_EXCL); +- +- xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); +- xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); +- + error = xfs_iext_count_may_overflow(tdp, XFS_DATA_FORK, + XFS_IEXT_DIR_MANIP_CNT(mp)); + if (error) +@@ -1306,6 +1298,8 @@ xfs_link( + error_return: + xfs_trans_cancel(tp); + std_return: ++ if (error == -ENOSPC && nospace_error) ++ error = nospace_error; + return error; + } + +@@ -2755,6 +2749,7 @@ xfs_remove( + xfs_mount_t *mp = dp->i_mount; + xfs_trans_t *tp = NULL; + int is_dir = S_ISDIR(VFS_I(ip)->i_mode); ++ int dontcare; + int error = 0; + uint resblks; + +@@ -2772,31 +2767,24 @@ xfs_remove( + goto std_return; + + /* +- * We try to get the real space reservation first, +- * allowing for directory btree deletion(s) implying +- * possible bmap insert(s). If we can't get the space +- * reservation then we use 0 instead, and avoid the bmap +- * btree insert(s) in the directory code by, if the bmap +- * insert tries to happen, instead trimming the LAST +- * block from the directory. ++ * We try to get the real space reservation first, allowing for ++ * directory btree deletion(s) implying possible bmap insert(s). If we ++ * can't get the space reservation then we use 0 instead, and avoid the ++ * bmap btree insert(s) in the directory code by, if the bmap insert ++ * tries to happen, instead trimming the LAST block from the directory. ++ * ++ * Ignore EDQUOT and ENOSPC being returned via nospace_error because ++ * the directory code can handle a reservationless update and we don't ++ * want to prevent a user from trying to free space by deleting things. + */ + resblks = XFS_REMOVE_SPACE_RES(mp); +- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, resblks, 0, 0, &tp); +- if (error == -ENOSPC) { +- resblks = 0; +- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0, +- &tp); +- } ++ error = xfs_trans_alloc_dir(dp, &M_RES(mp)->tr_remove, ip, &resblks, ++ &tp, &dontcare); + if (error) { + ASSERT(error != -ENOSPC); + goto std_return; + } + +- xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL); +- +- xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); +- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); +- + /* + * If we're removing a directory perform some additional validation. + */ +diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c +index 59e2f9031b9f..3d11f9bb0dbb 100644 +--- a/fs/xfs/xfs_trans.c ++++ b/fs/xfs/xfs_trans.c +@@ -1210,3 +1210,89 @@ xfs_trans_alloc_ichange( + xfs_trans_cancel(tp); + return error; + } ++ ++/* ++ * Allocate an transaction, lock and join the directory and child inodes to it, ++ * and reserve quota for a directory update. If there isn't sufficient space, ++ * @dblocks will be set to zero for a reservationless directory update and ++ * @nospace_error will be set to a negative errno describing the space ++ * constraint we hit. ++ * ++ * The caller must ensure that the on-disk dquots attached to this inode have ++ * already been allocated and initialized. The ILOCKs will be dropped when the ++ * transaction is committed or cancelled. ++ */ ++int ++xfs_trans_alloc_dir( ++ struct xfs_inode *dp, ++ struct xfs_trans_res *resv, ++ struct xfs_inode *ip, ++ unsigned int *dblocks, ++ struct xfs_trans **tpp, ++ int *nospace_error) ++{ ++ struct xfs_trans *tp; ++ struct xfs_mount *mp = ip->i_mount; ++ unsigned int resblks; ++ bool retried = false; ++ int error; ++ ++retry: ++ *nospace_error = 0; ++ resblks = *dblocks; ++ error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp); ++ if (error == -ENOSPC) { ++ *nospace_error = error; ++ resblks = 0; ++ error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp); ++ } ++ if (error) ++ return error; ++ ++ xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL); ++ ++ xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); ++ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); ++ ++ error = xfs_qm_dqattach_locked(dp, false); ++ if (error) { ++ /* Caller should have allocated the dquots! */ ++ ASSERT(error != -ENOENT); ++ goto out_cancel; ++ } ++ ++ error = xfs_qm_dqattach_locked(ip, false); ++ if (error) { ++ /* Caller should have allocated the dquots! */ ++ ASSERT(error != -ENOENT); ++ goto out_cancel; ++ } ++ ++ if (resblks == 0) ++ goto done; ++ ++ error = xfs_trans_reserve_quota_nblks(tp, dp, resblks, 0, false); ++ if (error == -EDQUOT || error == -ENOSPC) { ++ if (!retried) { ++ xfs_trans_cancel(tp); ++ xfs_blockgc_free_quota(dp, 0); ++ retried = true; ++ goto retry; ++ } ++ ++ *nospace_error = error; ++ resblks = 0; ++ error = 0; ++ } ++ if (error) ++ goto out_cancel; ++ ++done: ++ *tpp = tp; ++ *dblocks = resblks; ++ return 0; ++ ++out_cancel: ++ xfs_trans_cancel(tp); ++ return error; ++} +diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h +index a487b264a9eb..faa282204498 100644 +--- a/fs/xfs/xfs_trans.h ++++ b/fs/xfs/xfs_trans.h +@@ -259,6 +259,9 @@ int xfs_trans_alloc_icreate(struct xfs_mount *mp, struct xfs_trans_res *resv, + int xfs_trans_alloc_ichange(struct xfs_inode *ip, struct xfs_dquot *udqp, + struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, bool force, + struct xfs_trans **tpp); ++int xfs_trans_alloc_dir(struct xfs_inode *dp, struct xfs_trans_res *resv, ++ struct xfs_inode *ip, unsigned int *dblocks, ++ struct xfs_trans **tpp, int *nospace_error); + + static inline void + xfs_trans_set_context( +-- +2.35.3 + diff --git a/patches.suse/xhci-Fix-null-pointer-dereference-in-remove-if-xHC-h.patch b/patches.suse/xhci-Fix-null-pointer-dereference-in-remove-if-xHC-h.patch new file mode 100644 index 0000000..4d3d297 --- /dev/null +++ b/patches.suse/xhci-Fix-null-pointer-dereference-in-remove-if-xHC-h.patch @@ -0,0 +1,54 @@ +From 4a593a62a9e3a25ab4bc37f612e4edec144f7f43 Mon Sep 17 00:00:00 2001 +From: Mathias Nyman +Date: Thu, 25 Aug 2022 18:08:38 +0300 +Subject: [PATCH] xhci: Fix null pointer dereference in remove if xHC has only one roothub +Git-commit: 4a593a62a9e3a25ab4bc37f612e4edec144f7f43 +Patch-mainline: v6.0-rc4 +References: git-fixes + +The remove path in xhci platform driver tries to remove and put both main +and shared hcds even if only a main hcd exists (one roothub) + +This causes a null pointer dereference in reboot for those controllers. + +Check that the shared_hcd exists before trying to remove it. + +Fixes: e0fe986972f5 ("usb: host: xhci-plat: prepare operation w/o shared hcd") +Reported-by: Alexey Sheplyakov +Signed-off-by: Mathias Nyman +Link: https://lore.kernel.org/r/20220825150840.132216-2-mathias.nyman@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +Acked-by: Takashi Iwai + +--- + drivers/usb/host/xhci-plat.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c +index 044855818cb1..a8641b6536ee 100644 +--- a/drivers/usb/host/xhci-plat.c ++++ b/drivers/usb/host/xhci-plat.c +@@ -398,12 +398,17 @@ static int xhci_plat_remove(struct platform_device *dev) + pm_runtime_get_sync(&dev->dev); + xhci->xhc_state |= XHCI_STATE_REMOVING; + +- usb_remove_hcd(shared_hcd); +- xhci->shared_hcd = NULL; ++ if (shared_hcd) { ++ usb_remove_hcd(shared_hcd); ++ xhci->shared_hcd = NULL; ++ } ++ + usb_phy_shutdown(hcd->usb_phy); + + usb_remove_hcd(hcd); +- usb_put_hcd(shared_hcd); ++ ++ if (shared_hcd) ++ usb_put_hcd(shared_hcd); + + clk_disable_unprepare(clk); + clk_disable_unprepare(reg_clk); +-- +2.35.3 + diff --git a/patches.suse/xhci-Fix-null-pointer-dereference-in-resume-if-xhci-.patch b/patches.suse/xhci-Fix-null-pointer-dereference-in-resume-if-xhci-.patch new file mode 100644 index 0000000..7d593ca --- /dev/null +++ b/patches.suse/xhci-Fix-null-pointer-dereference-in-resume-if-xhci-.patch @@ -0,0 +1,71 @@ +From 802dcafc420af536fcde1b44ac51ca211f4ec673 Mon Sep 17 00:00:00 2001 +From: Mathias Nyman +Date: Fri, 10 Jun 2022 14:53:38 +0300 +Subject: [PATCH] xhci: Fix null pointer dereference in resume if xhci has only one roothub +Git-commit: 802dcafc420af536fcde1b44ac51ca211f4ec673 +Patch-mainline: v5.19-rc3 +References: git-fixes + +In the re-init path xhci_resume() passes 'hcd->primary_hcd' to hci_init(), +however this field isn't initialized by __usb_create_hcd() for a HCD +without secondary controller. + +xhci_resume() is called once per xHC device, not per hcd, so the extra +checking for primary hcd can be removed. + +Fixes: e0fe986972f5 ("usb: host: xhci-plat: prepare operation w/o shared hcd") +Reported-by: Matthias Kaehlcke +Tested-by: Matthias Kaehlcke +Signed-off-by: Mathias Nyman +Link: https://lore.kernel.org/r/20220610115338.863152-2-mathias.nyman@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +Acked-by: Takashi Iwai + +--- + drivers/usb/host/xhci.c | 15 +++++---------- + 1 file changed, 5 insertions(+), 10 deletions(-) + +diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c +index f0ab63138016..9ac56e9ffc64 100644 +--- a/drivers/usb/host/xhci.c ++++ b/drivers/usb/host/xhci.c +@@ -1107,7 +1107,6 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) + { + u32 command, temp = 0; + struct usb_hcd *hcd = xhci_to_hcd(xhci); +- struct usb_hcd *secondary_hcd; + int retval = 0; + bool comp_timer_running = false; + bool pending_portevent = false; +@@ -1214,23 +1213,19 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) + * first with the primary HCD, and then with the secondary HCD. + * If we don't do the same, the host will never be started. + */ +- if (!usb_hcd_is_primary_hcd(hcd)) +- secondary_hcd = hcd; +- else +- secondary_hcd = xhci->shared_hcd; +- + xhci_dbg(xhci, "Initialize the xhci_hcd\n"); +- retval = xhci_init(hcd->primary_hcd); ++ retval = xhci_init(hcd); + if (retval) + return retval; + comp_timer_running = true; + + xhci_dbg(xhci, "Start the primary HCD\n"); +- retval = xhci_run(hcd->primary_hcd); +- if (!retval && secondary_hcd) { ++ retval = xhci_run(hcd); ++ if (!retval && xhci->shared_hcd) { + xhci_dbg(xhci, "Start the secondary HCD\n"); +- retval = xhci_run(secondary_hcd); ++ retval = xhci_run(xhci->shared_hcd); + } ++ + hcd->state = HC_STATE_SUSPENDED; + if (xhci->shared_hcd) + xhci->shared_hcd->state = HC_STATE_SUSPENDED; +-- +2.35.3 + diff --git a/patches.suse/xtensa-platforms-iss-simdisk-add-error-handling-supp.patch b/patches.suse/xtensa-platforms-iss-simdisk-add-error-handling-supp.patch new file mode 100644 index 0000000..0036c69 --- /dev/null +++ b/patches.suse/xtensa-platforms-iss-simdisk-add-error-handling-supp.patch @@ -0,0 +1,65 @@ +From: Luis Chamberlain +Date: Mon, 27 Sep 2021 15:01:01 -0700 +Subject: [PATCH] xtensa/platforms/iss/simdisk: add error handling support for + add_disk() +Git-commit: db8eda9c43361023678aa23eb0dceb0a411af0f3 +Patch-mainline: v5.16-rc1 +References: jsc#PED-1183 + +We never checked for errors on add_disk() as this function +returned void. Now that this is fixed, use the shiny new +error handling. + +Signed-off-by: Luis Chamberlain +Acked-by: Max Filippov +Link: https://lore.kernel.org/r/20210927220110.1066271-7-mcgrof@kernel.org +Signed-off-by: Jens Axboe +Acked-by: Hannes Reinecke +--- + arch/xtensa/platforms/iss/simdisk.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c +index ddd1fe3db474..07b642c1916a 100644 +--- a/arch/xtensa/platforms/iss/simdisk.c ++++ b/arch/xtensa/platforms/iss/simdisk.c +@@ -258,6 +258,7 @@ static int __init simdisk_setup(struct simdisk *dev, int which, + struct proc_dir_entry *procdir) + { + char tmp[2] = { '0' + which, 0 }; ++ int err = -ENOMEM; + + dev->fd = -1; + dev->filename = NULL; +@@ -266,7 +267,7 @@ static int __init simdisk_setup(struct simdisk *dev, int which, + + dev->gd = blk_alloc_disk(NUMA_NO_NODE); + if (!dev->gd) +- return -ENOMEM; ++ goto out; + dev->gd->major = simdisk_major; + dev->gd->first_minor = which; + dev->gd->minors = SIMDISK_MINORS; +@@ -274,10 +275,18 @@ static int __init simdisk_setup(struct simdisk *dev, int which, + dev->gd->private_data = dev; + snprintf(dev->gd->disk_name, 32, "simdisk%d", which); + set_capacity(dev->gd, 0); +- add_disk(dev->gd); ++ err = add_disk(dev->gd); ++ if (err) ++ goto out_cleanup_disk; + + dev->procfile = proc_create_data(tmp, 0644, procdir, &simdisk_proc_ops, dev); ++ + return 0; ++ ++out_cleanup_disk: ++ blk_cleanup_disk(dev->gd); ++out: ++ return err; + } + + static int __init simdisk_init(void) +-- +2.35.3 + diff --git a/series.conf b/series.conf index 8cfe53d..0cca975 100644 --- a/series.conf +++ b/series.conf @@ -169,6 +169,9 @@ patches.suse/s390-pci-cleanup-resources-only-if-necessary.patch patches.suse/s390-kasan-fix-large-PMD-pages-address-alignment-che.patch patches.suse/s390-pci-fix-misleading-rc-in-clp_set_pci_fn.patch + patches.suse/s390-pci-handle-FH-state-mismatch-only-on-disable + patches.suse/s390-pci-simplify-CLP-List-PCI-handling + patches.suse/s390-pci-improve-DMA-translation-init-and-exit patches.suse/s390-debug-keep-debug-data-on-resize.patch patches.suse/s390-debug-fix-debug-area-life-cycle.patch patches.suse/s390-ap-fix-state-machine-hang-after-failure-to-enab.patch @@ -676,7 +679,32 @@ patches.suse/nfsd-fix-crash-on-LOCKT-on-reexported-NFSv3.patch patches.suse/iomap-simplify-iomap_readpage_actor.patch patches.suse/iomap-simplify-iomap_add_to_ioend.patch + patches.suse/iomap-support-reading-inline-data-from-non-zero-pos.patch + patches.suse/iomap-Support-inline-data-with-block-size-page-size.patch + patches.suse/iomap-Fix-some-typos-and-bad-grammar.patch patches.suse/iomap-pass-writeback-errors-to-the-mapping.patch + patches.suse/iomap-fix-a-trivial-comment-typo-in-trace.h.patch + patches.suse/iomap-remove-the-iomap-arguments-to-page_-prepare-do.patch + patches.suse/iomap-mark-the-iomap-argument-to-iomap_sector-const.patch + patches.suse/iomap-mark-the-iomap-argument-to-iomap_inline_data-c.patch + patches.suse/iomap-mark-the-iomap-argument-to-iomap_inline_data_v.patch + patches.suse/fs-mark-the-iomap-argument-to-__block_write_begin_in.patch + patches.suse/fsdax-mark-the-iomap-argument-to-dax_iomap_sector-as.patch + patches.suse/iomap-mark-the-iomap-argument-to-iomap_read_inline_d.patch + patches.suse/iomap-mark-the-iomap-argument-to-iomap_read_page_syn.patch + patches.suse/iomap-fix-the-iomap_readpage_actor-return-value-for-.patch + patches.suse/iomap-add-the-new-iomap_iter-model.patch + patches.suse/iomap-switch-readahead-and-readpage-to-use-iomap_ite.patch + patches.suse/iomap-switch-iomap_file_buffered_write-to-use-iomap_.patch + patches.suse/iomap-switch-iomap_file_unshare-to-use-iomap_iter.patch + patches.suse/iomap-switch-iomap_zero_range-to-use-iomap_iter.patch + patches.suse/iomap-switch-iomap_page_mkwrite-to-use-iomap_iter.patch + patches.suse/iomap-switch-__iomap_dio_rw-to-use-iomap_iter.patch + patches.suse/iomap-switch-iomap_fiemap-to-use-iomap_iter.patch + patches.suse/iomap-switch-iomap_bmap-to-use-iomap_iter.patch + patches.suse/iomap-switch-iomap_seek_hole-to-use-iomap_iter.patch + patches.suse/iomap-switch-iomap_seek_data-to-use-iomap_iter.patch + patches.suse/iomap-switch-iomap_swapfile_activate-to-use-iomap_it.patch patches.suse/mm-swap-consider-max-pages-in-iomap_swapfile_add_ext.patch patches.suse/audit-move-put_tree-to-avoid-trim_trees-refcount-und.patch patches.suse/Smack-Fix-wrong-semantics-in-smk_access_entry.patch @@ -723,6 +751,7 @@ patches.suse/mmc-core-Avoid-hogging-the-CPU-while-polling-for-bus-468108155b0f.patch patches.suse/mmc-core-Avoid-hogging-the-CPU-while-polling-for-bus-6966e6094c6d.patch patches.suse/mmc-sdhci-esdhc-imx-Remove-unneeded-mmc-esdhc-imx.h-.patch + patches.suse/mmc-core-Store-pointer-to-bio_crypt_ctx-in-mmc_reque.patch patches.suse/mmc-rtsx_pci-Fix-long-reads-when-clock-is-prescaled.patch patches.suse/mmc-sdhci-Fix-issue-with-uninitialized-dma_slave_con.patch patches.suse/mmc-dw_mmc-Fix-issue-with-uninitialized-dma_slave_co.patch @@ -1572,6 +1601,7 @@ patches.suse/selftests-bpf-Replace-CHECK-with-ASSERT_-macros-in-s.patch patches.suse/selftests-bpf-Fix-flaky-send_signal-test.patch patches.suse/bpf-Remove-redundant-initialization-of-variable-allo.patch + patches.suse/libbpf-Rename-libbpf-documentation-index-file.patch patches.suse/bpf-Allow-bpf_get_netns_cookie-in-BPF_PROG_TYPE_SOCK.patch patches.suse/selftests-bpf-Test-for-get_netns_cookie.patch patches.suse/bpf-Undo-off-by-one-in-interpreter-tail-call-count-l.patch @@ -2758,6 +2788,7 @@ patches.suse/docs-networking-dpaa2-fix-chapter-title-format.patch patches.suse/mm-Add-kvrealloc.patch patches.suse/xfs-allow-setting-and-clearing-of-log-incompat-featu.patch + patches.suse/xfs-convert-XLOG_FORCED_SHUTDOWN-to-xlog_is_shutdown.patch patches.suse/xfs-move-recovery-needed-state-updates-to-xfs_log_mo.patch patches.suse/xfs-make-xfs_rtalloc_query_range-input-parameters-co.patch patches.suse/xfs-fix-I_DONTCACHE.patch @@ -3397,6 +3428,7 @@ patches.suse/powerpc-config-Renable-MTD_PHYSMAP_OF.patch patches.suse/powerpc-kvm-Remove-obsolete-and-unneeded-select.patch patches.suse/powerpc-perf-hv-gpci-Fix-counter-value-parsing.patch + patches.suse/powerpc-pseries-vas-Declare-pseries_vas_fault_thread.patch patches.suse/powerpc-perf-Use-stack-siar-instead-of-mfspr.patch patches.suse/powerpc-perf-Drop-the-case-of-returning-0-as-instruc.patch patches.suse/powerpc-perf-Fix-the-check-for-SIAR-value.patch @@ -3857,6 +3889,8 @@ patches.suse/s390-entry-make-oklabel-within-CHKSTG-macro-local.patch patches.suse/s390-unwind-use-current_frame_address-to-unwind-current-task.patch patches.suse/s390-topology-fix-topology-information-when-calling-.patch + patches.suse/s390-pci-fix-clp_get_state-handling-of-ENODEV + patches.suse/s390-remove-xpram-device-driver.patch patches.suse/scsi-zfcp-fix-kernel-doc-comments patches.suse/tracing-Add-migrate-disabled-counter-to-tracing-outp.patch patches.suse/tracing-osnoise-Fix-missed-cpus_read_unlock-in-start_per_cpu_kthreads.patch @@ -4972,6 +5006,7 @@ patches.suse/virtio-write-back-F_VERSION_1-before-validate.patch patches.suse/vhost_vdpa-unset-vq-irq-before-freeing-irq.patch patches.suse/Revert-virtio-blk-Add-validation-for-block-size-in-c.patch + patches.suse/VDUSE-fix-documentation-underline-warning.patch patches.suse/vhost-vdpa-Fix-the-wrong-input-in-config_cb.patch patches.suse/block-rnbd-clt-sysfs-fix-a-couple-uninitialized-vari.patch patches.suse/nvme-pci-Fix-abort-command-id.patch @@ -4999,6 +5034,7 @@ patches.suse/vfs-check-fd-has-read-access-in-kernel_read_file_fro.patch patches.suse/mm-secretmem-fix-NULL-page-mapping-dereference-in-pa.patch patches.suse/mm-thp-decrease-nr_thps-in-file-s-mapping-on-THP-spl.patch + patches.suse/KVM-s390-Function-documentation-fixes patches.suse/KVM-SEV-ES-fix-length-of-string-I-O.patch patches.suse/KVM-arm64-Fix-host-stage-2-PGD-refcount.patch patches.suse/KVM-arm64-Report-corrupted-refcount-at-EL2.patch @@ -5121,6 +5157,7 @@ patches.suse/ACPI-tools-fix-compilation-error.patch patches.suse/hyperv-vmbus-include-linux-bitops.h.patch patches.suse/blk-cgroup-blk_cgroup_bio_start-should-use-irq-safe-.patch + patches.suse/block-fix-incorrect-references-to-disk-objects.patch patches.suse/scsi-core-Fix-shost-cmd_per_lun-calculation-in-scsi_.patch patches.suse/scsi-iscsi-Fix-set_param-handling.patch patches.suse/scsi-storvsc-Fix-validation-for-unsolicited-incoming.patch @@ -5280,6 +5317,7 @@ patches.suse/mmc-cqhci-clear-HALT-state-after-CQE-enable.patch patches.suse/mmc-mediatek-Move-cqhci-init-behind-ungate-clock.patch patches.suse/mmc-tmio-reenable-card-irqs-after-the-reset-callback.patch + patches.suse/block-drain-queue-after-disk-is-removed-from-sysfs.patch patches.suse/block-schedule-queue-restart-after-BLK_STS_ZONE_RESO.patch patches.suse/block-Fix-partition-check-for-host-aware-zoned-block.patch patches.suse/nvmet-tcp-fix-a-memory-leak-when-releasing-a-queue.patch @@ -5366,7 +5404,65 @@ patches.suse/blk-mq-optimise-end_request-non-stat-path.patch patches.suse/sbitmap-add-__sbitmap_queue_get_batch.patch patches.suse/block-improve-batched-tag-allocation.patch + patches.suse/block-remove-redundant-y-from-BLK_CGROUP-dependency.patch + patches.suse/block-simplify-Kconfig-files.patch + patches.suse/block-move-menu-Partition-type-to-block-partitions-K.patch + patches.suse/block-move-CONFIG_BLOCK-guard-to-top-Makefile.patch + patches.suse/block-only-check-previous-entry-for-plug-merge-attem.patch + patches.suse/direct-io-remove-blk_poll-support.patch + patches.suse/block-don-t-try-to-poll-multi-bio-I-Os-in-__blkdev_d.patch + patches.suse/iomap-don-t-try-to-poll-multi-bio-I-Os-in-__iomap_di.patch + patches.suse/io_uring-fix-a-layering-violation-in-io_iopoll_req_i.patch + patches.suse/blk-mq-factor-out-a-blk_qc_to_hctx-helper.patch + patches.suse/blk-mq-factor-out-a-classic-poll-helper.patch + patches.suse/blk-mq-remove-blk_qc_t_to_tag-and-blk_qc_t_is_intern.patch + patches.suse/blk-mq-remove-blk_qc_t_valid.patch + patches.suse/block-replace-the-spin-argument-to-blk_iopoll-with-a.patch + patches.suse/io_uring-don-t-sleep-when-polling-for-I-O.patch + patches.suse/block-rename-REQ_HIPRI-to-REQ_POLLED.patch + patches.suse/block-use-SLAB_TYPESAFE_BY_RCU-for-the-bio-slab.patch + patches.suse/block-define-struct-bvec_iter-as-packed.patch + patches.suse/block-switch-polling-to-be-bio-based.patch + patches.suse/block-don-t-allow-writing-to-the-poll-queue-attribut.patch + patches.suse/nvme-multipath-enable-polled-I-O.patch + patches.suse/block-cache-bdev-in-struct-file-for-raw-bdev-IO.patch + patches.suse/block-use-flags-instead-of-bit-fields-for-blkdev_dio.patch + patches.suse/block-handle-fast-path-of-bio-splitting-inline.patch + patches.suse/block-cache-request-queue-in-bdev.patch + patches.suse/block-use-bdev_get_queue-in-bdev.c.patch + patches.suse/block-use-bdev_get_queue-in-bio.c.patch + patches.suse/block-use-bdev_get_queue-in-blk-core.c.patch + patches.suse/block-convert-the-rest-of-block-to-bdev_get_queue.patch + patches.suse/block-don-t-bother-iter-advancing-a-fully-done-bio.patch + patches.suse/block-remove-useless-caller-argument-to-print_req_er.patch + patches.suse/block-move-update-request-helpers-into-blk-mq.c.patch + patches.suse/block-improve-layout-of-struct-request.patch patches.suse/block-only-mark-bio-as-tracked-if-it-really-is-track.patch + patches.suse/block-store-elevator-state-in-request.patch + patches.suse/block-skip-elevator-fields-init-for-non-elv-queue.patch + patches.suse/block-blk_mq_rq_ctx_init-cache-ctx-q-hctx.patch + patches.suse/block-cache-rq_flags-inside-blk_mq_rq_ctx_init.patch + patches.suse/block-remove-debugfs-blk_mq_ctx-dispatched-merged-co.patch + patches.suse/block-remove-some-blk_mq_hw_ctx-debugfs-entries.patch + patches.suse/block-provide-helpers-for-rq_list-manipulation.patch + patches.suse/block-add-a-struct-io_comp_batch-argument-to-fops-io.patch + patches.suse/sbitmap-add-helper-to-clear-a-batch-of-tags.patch + patches.suse/block-add-support-for-blk_mq_end_request_batch.patch + patches.suse/nvme-add-support-for-batched-completion-of-polled-IO.patch + patches.suse/io_uring-utilize-the-io-batching-infrastructure-for-.patch + patches.suse/nvme-wire-up-completion-batching-for-the-IRQ-path.patch + patches.suse/block-fix-too-broad-elevator-check-in-blk_mq_free_re.patch + patches.suse/block-move-bdev_read_only-into-the-header.patch + patches.suse/block-don-t-call-blk_status_to_errno-in-blk_update_r.patch + patches.suse/block-return-whether-or-not-to-unplug-through-boolea.patch + patches.suse/block-get-rid-of-plug-list-sorting.patch + patches.suse/block-move-blk_mq_tag_to_rq-inline.patch + patches.suse/block-align-blkdev_dio-inlined-bio-to-a-cacheline.patch + patches.suse/blk-wbt-prevent-NULL-pointer-dereference-in-wb_timer.patch + patches.suse/block-change-plugging-to-use-a-singly-linked-list.patch + patches.suse/block-attempt-direct-issue-of-plug-list.patch + patches.suse/blk-mq-don-t-handle-non-flush-requests-in-blk_insert.patch + patches.suse/block-inline-fast-path-of-driver-tag-allocation.patch patches.suse/block-bfq-fix-UAF-problem-in-bfqg_stats_init.patch patches.suse/nvme-add-APIs-for-stopping-starting-admin-queue.patch patches.suse/nvme-apply-nvme-API-to-quiesce-unquiesce-admin-queue.patch @@ -5374,18 +5470,98 @@ patches.suse/nvme-paring-quiesce-unquiesce.patch patches.suse/nvme-loop-clear-NVME_CTRL_ADMIN_Q_STOPPED-after-admi.patch patches.suse/blk-mq-support-concurrent-queue-quiesce-unquiesce.patch + patches.suse/block-turn-macro-helpers-into-inline-functions.patch + patches.suse/block-convert-leftovers-to-bdev_get_queue.patch + patches.suse/block-optimise-req_bio_endio.patch + patches.suse/block-don-t-bloat-enter_queue-with-percpu_ref.patch + patches.suse/block-inline-a-part-of-bio_release_pages.patch patches.suse/block-remove-inaccurate-requeue-check.patch + patches.suse/blk-mq-only-flush-requests-from-the-plug-in-blk_mq_s.patch + patches.suse/blk-mq-move-blk_mq_flush_plug_list-to-block-blk-mq.h.patch + patches.suse/block-optimise-blk_flush_plug_list.patch + patches.suse/block-cleanup-the-flush-plug-helpers.patch + patches.suse/blk-mq-Fix-blk_mq_tagset_busy_iter-for-shared-tags.patch + patches.suse/block-optimise-boundary-blkdev_read_iter-s-checks.patch + patches.suse/block-clean-up-blk_mq_submit_bio-merging.patch + patches.suse/block-convert-fops.c-magic-constants-to-SHIFT_SECTOR.patch + patches.suse/percpu_ref-percpu_ref_tryget_live-version-holding-RC.patch + patches.suse/block-kill-extra-rcu-lock-unlock-in-queue-enter.patch + patches.suse/block-Add-invalidate_disk-helper-to-invalidate-the-g.patch + patches.suse/loop-Use-invalidate_disk-helper-to-invalidate-gendis.patch + patches.suse/loop-Remove-the-unnecessary-bdev-checks-and-unused-b.patch + patches.suse/blk-crypto-fallback-properly-prefix-function-and-str.patch + patches.suse/blk-crypto-rename-keyslot-manager-files-to-blk-crypt.patch + patches.suse/blk-crypto-rename-blk_keyslot_manager-to-blk_crypto_.patch + patches.suse/blk-crypto-update-inline-encryption-documentation.patch + patches.suse/block-fix-req_bio_endio-append-error-handling.patch + patches.suse/blk-mq-sched-Don-t-reference-queue-tagset-in-blk_mq_.patch + patches.suse/sched-make-task_struct-plug-always-defined.patch + patches.suse/block-add-single-bio-async-direct-IO-helper.patch + patches.suse/block-refactor-bio_iov_bvec_set.patch patches.suse/blk-cgroup-synchronize-blkg-creation-against-policy-.patch + patches.suse/sbitmap-silence-data-race-warning.patch + patches.suse/blk-mq-don-t-issue-request-directly-in-case-that-cur.patch + patches.suse/block-Add-independent-access-ranges-support.patch + patches.suse/block-avoid-extra-iter-advance-with-async-iocb.patch + patches.suse/block-kill-unused-polling-bits-in-__blkdev_direct_IO.patch + patches.suse/block-kill-DIO_MULTI_BIO.patch + patches.suse/block-add-async-version-of-bio_set_polled.patch + patches.suse/block-add-rq_flags-to-struct-blk_mq_alloc_data.patch + patches.suse/block-pass-in-blk_mq_tags-to-blk_mq_rq_ctx_init.patch + patches.suse/block-prefetch-request-to-be-initialized.patch + patches.suse/block-re-flow-blk_mq_rq_ctx_init.patch patches.suse/block-Add-a-helper-to-validate-the-block-size.patch patches.suse/loop-Use-blk_validate_block_size-to-validate-block-s.patch patches.suse/0002-virtio-blk-Use-blk_validate_block_size-to-validate-b.patch + patches.suse/block-improve-readability-of-blk_mq_end_request_batc.patch + patches.suse/blk-mq-debugfs-Show-active-requests-per-queue-for-sh.patch + patches.suse/null_blk-poll-queue-support.patch + patches.suse/loop-add-error-handling-support-for-add_disk.patch + patches.suse/aoe-add-error-handling-support-for-add_disk.patch + patches.suse/n64cart-add-error-handling-support-for-add_disk.patch + patches.suse/pcd-move-the-identify-buffer-into-pcd_identify.patch + patches.suse/pcd-cleanup-initialization.patch + patches.suse/pf-cleanup-initialization.patch + patches.suse/pd-cleanup-initialization.patch + patches.suse/pcd-add-error-handling-support-for-add_disk.patch + patches.suse/pcd-fix-ordering-of-unregister_cdrom.patch + patches.suse/pcd-capture-errors-on-cdrom_register.patch + patches.suse/pd-add-error-handling-support-for-add_disk.patch + patches.suse/mtip32xx-add-error-handling-support-for-add_disk.patch + patches.suse/pktcdvd-add-error-handling-support-for-add_disk.patch + patches.suse/block-rsxx-add-error-handling-support-for-add_disk.patch + patches.suse/block-sx8-add-error-handling-support-for-add_disk.patch + patches.suse/pf-add-error-handling-support-for-add_disk.patch + patches.suse/cdrom-gdrom-add-error-handling-support-for-add_disk.patch + patches.suse/rbd-add-add_disk-error-handling.patch + patches.suse/block-swim3-add-error-handling-support-for-add_disk.patch + patches.suse/floppy-fix-add_disk-assumption-on-exit-due-to-new-de.patch + patches.suse/floppy-use-blk_cleanup_disk.patch patches.suse/floppy-fix-calling-platform_device_unregister-on-inv.patch + patches.suse/floppy-add-error-handling-support-for-add_disk.patch + patches.suse/amiflop-add-error-handling-support-for-add_disk.patch + patches.suse/swim-simplify-using-blk_cleanup_disk-on-swim_remove.patch + patches.suse/swim-add-helper-for-disk-cleanup.patch + patches.suse/swim-add-a-floppy-registration-bool-which-triggers-d.patch + patches.suse/swim-add-error-handling-support-for-add_disk.patch patches.suse/block-ataflop-use-the-blk_cleanup_disk-helper.patch patches.suse/block-ataflop-add-registration-bool-before-calling-d.patch patches.suse/block-ataflop-provide-a-helper-for-cleanup-up-an-ata.patch + patches.suse/block-ataflop-add-error-handling-support-for-add_dis.patch + patches.suse/xtensa-platforms-iss-simdisk-add-error-handling-supp.patch + patches.suse/pcd-fix-error-codes-in-pcd_init_unit.patch + patches.suse/pf-fix-error-codes-in-pf_init_unit.patch + patches.suse/sx8-fix-an-error-code-in-carm_init_one.patch + patches.suse/swim3-add-missing-major.h-include.patch + patches.suse/md-add-error-handling-support-for-add_disk.patch + patches.suse/md-add-the-bitmap-group-to-the-default-groups-for-th.patch + patches.suse/md-extend-disks_mutex-coverage.patch + patches.suse/md-properly-unwind-when-failing-to-add-the-kobject-i.patch patches.suse/md-raid1-only-allocate-write-behind-bio-for-WriteMos.patch patches.suse/md-update-superblock-after-changing-rdev-flags-in-st.patch patches.suse/block-ataflop-fix-breakage-introduced-at-blk-mq-refa.patch + patches.suse/nvme-move-command-clear-into-the-various-setup-helpe.patch + patches.suse/nvme-don-t-memset-the-normal-read-write-command.patch patches.suse/nbd-Fix-use-after-free-in-pid_show.patch patches.suse/nvme-fc-add-support-for-map_queues.patch patches.suse/qla2xxx-add-map_queues-support-for-nvme.patch @@ -5401,12 +5577,65 @@ patches.suse/nvme-display-correct-subsystem-NQN.patch patches.suse/nvme-rdma-fix-error-code-in-nvme_rdma_setup_ctrl.patch patches.suse/nvme-drop-scan_lock-and-always-kick-requeue-list-whe.patch + patches.suse/dm-add-add_disk-error-handling.patch + patches.suse/bcache-add-error-handling-support-for-add_disk.patch + patches.suse/xen-blkfront-add-error-handling-support-for-add_disk.patch + patches.suse/m68k-emu-nfblock-add-error-handling-support-for-add_.patch + patches.suse/um-drivers-ubd_kern-add-error-handling-support-for-a.patch + patches.suse/mtd-add-add_disk-error-handling.patch + patches.suse/block-remove-support-for-cryptoloop-and-the-xor-tran.patch patches.suse/block-ataflop-more-blk-mq-refactoring-fixes.patch patches.suse/nvme-add-new-discovery-log-page-entry-definitions.patch patches.suse/nvmet-switch-check-for-subsystem-type.patch patches.suse/nvmet-register-discovery-subsystem-as-current.patch + patches.suse/block-ataflop-Fix-warning-comparing-pointer-to-0.patch + patches.suse/null_blk-Fix-handling-of-submit_queues-and-poll_queu.patch patches.suse/io-wq-Remove-duplicate-code-in-io_workqueue_create.patch patches.suse/block-move-the-SECTOR_SIZE-related-definitions-to-bl.patch + patches.suse/block-add-a-bdev_nr_bytes-helper.patch + patches.suse/bcache-remove-bdev_sectors.patch + patches.suse/drbd-use-bdev_nr_sectors-instead-of-open-coding-it.patch + patches.suse/dm-use-bdev_nr_sectors-and-bdev_nr_bytes-instead-of-.patch + patches.suse/md-use-bdev_nr_sectors-instead-of-open-coding-it.patch + patches.suse/nvmet-use-bdev_nr_bytes-instead-of-open-coding-it.patch + patches.suse/target-iblock-use-bdev_nr_bytes-instead-of-open-codi.patch + patches.suse/fs-use-bdev_nr_bytes-instead-of-open-coding-it-in-bl.patch + patches.suse/fs-simplify-init_page_buffers.patch + patches.suse/affs-use-bdev_nr_sectors-instead-of-open-coding-it.patch + patches.suse/btrfs-use-bdev_nr_bytes-instead-of-open-coding-it.patch + patches.suse/cramfs-use-bdev_nr_bytes-instead-of-open-coding-it.patch + patches.suse/fat-use-bdev_nr_sectors-instead-of-open-coding-it.patch + patches.suse/hfs-use-bdev_nr_sectors-instead-of-open-coding-it.patch + patches.suse/hfsplus-use-bdev_nr_sectors-instead-of-open-coding-i.patch + patches.suse/jfs-use-bdev_nr_bytes-instead-of-open-coding-it.patch + patches.suse/nfs-blocklayout-use-bdev_nr_bytes-instead-of-open-co.patch + patches.suse/nilfs2-use-bdev_nr_bytes-instead-of-open-coding-it.patch + patches.suse/pstore-blk-use-bdev_nr_bytes-instead-of-open-coding-.patch + patches.suse/reiserfs-use-bdev_nr_bytes-instead-of-open-coding-it.patch + patches.suse/squashfs-use-bdev_nr_bytes-instead-of-open-coding-it.patch + patches.suse/block-use-bdev_nr_bytes-instead-of-open-coding-it-in.patch + patches.suse/block-add-a-sb_bdev_nr_blocks-helper.patch + patches.suse/ext4-use-sb_bdev_nr_blocks.patch + patches.suse/jfs-use-sb_bdev_nr_blocks.patch + patches.suse/ntfs-use-sb_bdev_nr_blocks.patch + patches.suse/reiserfs-use-sb_bdev_nr_blocks.patch + patches.suse/udf-use-sb_bdev_nr_blocks.patch + patches.suse/block-cache-inode-size-in-bdev.patch + patches.suse/block-ioctl-use-bdev_nr_sectors-and-bdev_nr_bytes.patch + patches.suse/partitions-efi-use-bdev_nr_bytes-instead-of-open-cod.patch + patches.suse/partitions-ibm-use-bdev_nr_sectors-instead-of-open-c.patch + patches.suse/scsi-sd-add-concurrent-positioning-ranges-support.patch + patches.suse/libata-support-concurrent-positioning-ranges-log.patch + patches.suse/doc-document-sysfs-queue-independent_access_ranges-a.patch + patches.suse/doc-Fix-typo-in-request-queue-sysfs-documentation.patch + patches.suse/cdrom-Remove-redundant-variable-and-its-assignment.patch + patches.suse/block-add-a-get_unique_id-method.patch + patches.suse/sd-implement-get_unique_id.patch + patches.suse/nfsd-blocklayout-use-get_unique_id-instead-of-sendin.patch + patches.suse/bsg-lib-initialize-the-bsg_job-in-bsg_transport_sg_i.patch + patches.suse/scsi-add-a-scsi_alloc_request-helper.patch + patches.suse/block-remove-the-initialize_rq_fn-blk_mq_ops-method.patch + patches.suse/block-remove-QUEUE_FLAG_SCSI_PASSTHROUGH.patch patches.suse/fscrypt-allow-256-bit-master-keys-with-AES-256-XTS.patch patches.suse/lib-xz-Avoid-overlapping-memcpy-with-invalid-input-w.patch patches.suse/lib-xz-Validate-the-value-before-assigning-it-to-an-.patch @@ -5696,6 +5925,7 @@ patches.suse/x86-fpu-amx-enable-the-amx-feature-in-64-bit-mode.patch patches.suse/selftests-x86-amx-add-test-cases-for-amx-state-management.patch patches.suse/selftests-x86-amx-add-context-switch-test.patch + patches.suse/x86-fpu-Include-vmalloc.h-for-vzalloc.patch patches.suse/documentation-x86-add-documentation-for-using-dynamic-xstate-features.patch patches.suse/EDAC-amd64-Handle-three-rank-interleaving-mode.patch patches.suse/edac-sb_edac-fix-top-of-high-memory-value-for-broadwell-haswell.patch @@ -6108,6 +6338,7 @@ patches.suse/selftests-bpf-Adopt-attach_probe-selftest-to-work-on.patch patches.suse/libbpf-Refactor-and-simplify-legacy-kprobe-code.patch patches.suse/libbpf-Add-legacy-uprobe-attaching-support.patch + patches.suse/bpf-Document-BPF-licensing.patch patches.suse/bpf-Check-the-other-end-of-slot_type-for-STACK_SPILL.patch patches.suse/bpf-Support-8-byte-scalar-spill-and-refill.patch patches.suse/bpf-selftest-A-bpf-prog-that-has-a-32bit-scalar-spil.patch @@ -6523,6 +6754,7 @@ patches.suse/net-s390-constify-and-use-eth_hw_addr_set patches.suse/net-plip-use-eth_hw_addr_set.patch patches.suse/net-sb1000-rionet-use-eth_hw_addr_set.patch + patches.suse/mac80211-mesh-clean-up-rx_bcn_presp-API.patch patches.suse/mac80211-move-CRC-into-struct-ieee802_11_elems.patch patches.suse/mac80211-mlme-find-auth-challenge-directly.patch patches.suse/mac80211-always-allocate-struct-ieee802_11_elems.patch @@ -7088,6 +7320,7 @@ patches.suse/ARM-9136-1-ARMv7-M-uses-BE-8-not-BE-32.patch patches.suse/ARM-9142-1-kasan-work-around-LPAE-build-warning.patch patches.suse/iov_iter-Fix-iov_iter_get_pages-_alloc-page-fault-re.patch + patches.suse/powerpc-kvm-Fix-kvm_use_magic_page.patch patches.suse/gup-Turn-fault_in_pages_-readable-writeable-into-fault_in_-readable-writeable.patch patches.suse/gfs2-Cancel-remote-delete-work-asynchronously.patch patches.suse/gfs2-Fix-glock_hash_walk-bugs.patch @@ -7095,6 +7328,7 @@ patches.suse/xfs-fold-perag-loop-iteration-logic-into-helper-func.patch patches.suse/xfs-rename-the-next_agno-perag-iteration-variable.patch patches.suse/xfs-terminate-perag-iteration-reliably-on-agcount.patch + patches.suse/xfs-fix-perag-reference-leak-on-iteration-race-with-.patch patches.suse/xfs-punch-out-data-fork-delalloc-blocks-on-COW-write.patch patches.suse/workqueue-make-sysfs-of-unbound-kworker-cpumask-more.patch patches.suse/cgroup-Make-rebind_subsystems-disable-v2-controllers.patch @@ -8278,6 +8512,7 @@ patches.suse/scsi-mpi3mr-Use-scnprintf-instead-of-snprintf.patch patches.suse/scsi-target-cxgbit-Increase-max-DataSegmentLength.patch patches.suse/scsi-target-cxgbit-Enable-Delayed-ACK.patch + patches.suse/scsi-mpt3sas-Make-mpt3sas_dev_attrs-static.patch patches.suse/scsi-ufs-ufshcd-pltfrm-Fix-memory-leak-due-to-probe-defer patches.suse/scsi-lpfc-Revert-LOG_TRACE_EVENT-back-to-LOG_INIT-pr.patch patches.suse/scsi-lpfc-Wait-for-successful-restart-of-SLI3-adapte.patch @@ -8583,6 +8818,7 @@ patches.suse/libfs-Move-shmem_exchange-to-simple_rename_exchange.patch patches.suse/libfs-Support-RENAME_EXCHANGE-in-simple_rename.patch patches.suse/selftests-bpf-Convert-test_bpffs-to-ASSERT-macros.patch + patches.suse/selftests-bpf-Test-RENAME_EXCHANGE-and-RENAME_NOREPL.patch patches.suse/selftests-bpf-Make-netcnt-selftests-serial-to-avoid-.patch patches.suse/bpf-Do-not-reject-when-the-stack-read-size-is-differ.patch patches.suse/bpftool-Install-libbpf-headers-for-the-bootstrap-ver.patch @@ -8636,6 +8872,7 @@ patches.suse/apparmor-fix-error-check.patch patches.suse/thermal-int340x-fix-build-on-32-bit-targets.patch patches.suse/libata-fix-read-log-timeout-value.patch + patches.suse/libata-libahci-declare-ahci_shost_attr_group-as-stat.patch patches.suse/rtc-ds1302-Add-SPI-ID-table.patch patches.suse/rtc-ds1390-Add-SPI-ID-table.patch patches.suse/rtc-pcf2123-Add-SPI-ID-table.patch @@ -8707,6 +8944,10 @@ patches.suse/scsi-core-Remove-command-size-deduction-from-scsi_setup_scsi_cmnd patches.suse/scsi-scsi_debug-Don-t-call-kcalloc-if-size-arg-is-zero.patch patches.suse/MIPS-fix-duplicated-slashes-for-Platform-file-path.patch + patches.suse/s390-pci-refresh-function-handle-in-iomap + patches.suse/s390-pci-implement-reset_slot-for-hotplug-slot + patches.suse/PCI-Export-pci_dev_lock + patches.suse/s390-pci-implement-minimal-PCI-error-recovery patches.suse/s390-tape-fix-timer-initialization-in-tape_std_assig.patch patches.suse/s390-cpumf-cpum_cf-PMU-displays-invalid-value-after-.patch patches.suse/s390-cio-check-the-subchannel-validity-for-dev_busid.patch @@ -9313,6 +9554,7 @@ patches.suse/usb-cdnsp-Fix-a-NULL-pointer-dereference-in-cdnsp_en.patch patches.suse/x86-sme-explicitly-map-new-efi-memmap-table-as-encrypted.patch patches.suse/platform-x86-amd-pmc-Fix-s2idle-failures-on-certain-.patch + patches.suse/platform-x86-intel-hid-add-quirk-to-support-Surface-.patch patches.suse/HID-quirks-Add-quirk-for-the-Microsoft-Surface-3-typ.patch patches.suse/HID-add-hid_is_usb-function-to-make-it-simpler-for-U.patch patches.suse/HID-wacom-fix-problems-when-device-is-not-a-valid-US.patch @@ -9360,6 +9602,7 @@ patches.suse/bpf-Make-CONFIG_DEBUG_INFO_BTF-depend-upon-CONFIG_BP.patch patches.suse/bpf-Fix-bpf_check_mod_kfunc_call-for-built-in-module.patch patches.suse/tools-resolve_btfids-Skip-unresolved-symbol-warning-.patch + patches.suse/treewide-Add-missing-includes-masked-by-cgroup-bpf-d.patch patches.suse/bpf-Fix-the-off-by-two-error-in-range-markings.patch patches.suse/bpf-Add-selftests-to-cover-packet-access-corner-case.patch patches.suse/ice-fix-FDIR-init-missing-when-reset-VF.patch @@ -9745,6 +9988,7 @@ patches.suse/usb-mtu3-fix-list_head-check-warning.patch patches.suse/usb-mtu3-set-interval-of-FS-intr-and-isoc-endpoint.patch patches.suse/xhci-Fresco-FL1100-controller-should-not-have-BROKEN.patch + patches.suse/usb-typec-ucsi-Only-check-the-contract-if-there-is-a.patch patches.suse/nitro_enclaves-Use-get_user_pages_unlocked-call-to-handle-mmap-assert.patch patches.suse/net-mlx5-DR-Fix-NULL-vs-IS_ERR-checking-in-dr_domain.patch patches.suse/net-mlx5-DR-Fix-querying-eswitch-manager-vport-for-E.patch @@ -9890,6 +10134,7 @@ patches.suse/ARM-dts-BCM5301X-update-CRU-block-description.patch patches.suse/ARM-dts-aspeed-Add-secure-boot-controller-node.patch patches.suse/Documentation-arch-Remove-leftovers-from-raw-device.patch + patches.suse/arch-Remove-leftovers-from-prism54-wireless-driver.patch patches.suse/drivers-perf-Add-LLC-TAD-perf-counter-support.patch patches.suse/drivers-perf-marvell_cn10k-fix-an-IS_ERR-vs-NULL-check.patch patches.suse/arm64-Simplify-checking-for-populated-DT.patch @@ -10087,9 +10332,75 @@ patches.suse/media-hantro-Fix-probe-func-error-path.patch patches.suse/net-stmmac-enhance-XDP-ZC-driver-level-switching-per.patch patches.suse/net-usb-ax88179_178a-add-TSO-feature.patch + patches.suse/bpftool-Migrate-1-err-checks-of-libbpf-fn-calls.patch + patches.suse/bpftool-Use-bpf_obj_get_info_by_fd-directly.patch + patches.suse/libbpf-Detect-corrupted-ELF-symbols-section.patch + patches.suse/libbpf-Improve-sanity-checking-during-BTF-fix-up.patch + patches.suse/libbpf-Validate-that-.BTF-and-.BTF.ext-sections-cont.patch + patches.suse/libbpf-Fix-section-counting-logic.patch + patches.suse/libbpf-Improve-ELF-relo-sanitization.patch + patches.suse/libbpf-Deprecate-bpf_program__load-API.patch + patches.suse/libbpf-Fix-non-C89-loop-variable-declaration-in-gen_.patch + patches.suse/libbpf-Rename-DECLARE_LIBBPF_OPTS-into-LIBBPF_OPTS.patch + patches.suse/libbpf-Pass-number-of-prog-load-attempts-explicitly.patch + patches.suse/libbpf-Unify-low-level-BPF_PROG_LOAD-APIs-into-bpf_p.patch + patches.suse/libbpf-Remove-internal-use-of-deprecated-bpf_prog_lo.patch + patches.suse/libbpf-Stop-using-to-be-deprecated-APIs.patch + patches.suse/bpftool-Stop-using-deprecated-bpf_load_program.patch + patches.suse/libbpf-Remove-deprecation-attribute-from-struct-bpf_.patch + patches.suse/selftests-bpf-Fix-non-strict-SEC-program-sections.patch + patches.suse/selftests-bpf-Convert-legacy-prog-load-APIs-to-bpf_p.patch + patches.suse/selftests-bpf-Merge-test_stub.c-into-testing_helpers.patch + patches.suse/selftests-bpf-Use-explicit-bpf_prog_test_load-calls-.patch + patches.suse/selftests-bpf-Use-explicit-bpf_test_load_program-hel.patch + patches.suse/selftests-bpf-Pass-sanitizer-flags-to-linker-through.patch patches.suse/libbpf-Free-up-resources-used-by-inner-map-definitio.patch + patches.suse/selftests-bpf-Fix-memory-leaks-in-btf_type_c_dump-he.patch + patches.suse/selftests-bpf-Free-per-cpu-values-array-in-bpf_iter-.patch + patches.suse/selftests-bpf-Free-inner-strings-index-in-btf-selfte.patch + patches.suse/selftests-bpf-Clean-up-btf-and-btf_dump-in-dump_data.patch + patches.suse/selftests-bpf-Avoid-duplicate-btf__parse-call.patch + patches.suse/selftests-bpf-Destroy-XDP-link-correctly.patch + patches.suse/selftests-bpf-Fix-bpf_object-leak-in-skb_ctx-selftes.patch + patches.suse/bpf-Introduce-helper-bpf_find_vma.patch + patches.suse/selftests-bpf-Add-tests-for-bpf_find_vma.patch + patches.suse/selftests-bpf-Add-exception-handling-selftests-for-t.patch + patches.suse/libbpf-Compile-using-std-gnu89.patch + patches.suse/bpftool-Fix-SPDX-tag-for-Makefiles-and-.gitignore.patch + patches.suse/bpf-Add-ingress_ifindex-to-bpf_sk_lookup.patch + patches.suse/selftests-bpf-Add-tests-for-accessing-ingress_ifinde.patch + patches.suse/libbpf-Add-ability-to-get-set-per-program-load-flags.patch + patches.suse/selftests-bpf-Fix-bpf_prog_test_load-logic-to-pass-e.patch + patches.suse/bpftool-Normalize-compile-rules-to-specify-output-fi.patch + patches.suse/selftests-bpf-Minor-cleanups-and-normalization-of-Ma.patch + patches.suse/libbpf-Turn-btf_dedup_opts-into-OPTS-based-struct.patch + patches.suse/libbpf-Ensure-btf_dump__new-and-btf_dump_opts-are-fu.patch + patches.suse/libbpf-Make-perf_buffer__new-use-OPTS-based-interfac.patch + patches.suse/selftests-bpf-Migrate-all-deprecated-perf_buffer-use.patch + patches.suse/selftests-bpf-Update-btf_dump__new-uses-to-v1.0-vari.patch + patches.suse/tools-runqslower-Update-perf_buffer__new-calls.patch + patches.suse/bpftool-Update-btf_dump__new-and-perf_buffer__new_ra.patch + patches.suse/bpf-Support-BTF_KIND_TYPE_TAG-for-btf_type_tag-attri.patch + patches.suse/libbpf-Support-BTF_KIND_TYPE_TAG.patch + patches.suse/bpftool-Support-BTF_KIND_TYPE_TAG.patch + patches.suse/selftests-bpf-Test-libbpf-API-function-btf__add_type.patch + patches.suse/selftests-bpf-Add-BTF_KIND_TYPE_TAG-unit-tests.patch + patches.suse/selftests-bpf-Test-BTF_KIND_DECL_TAG-for-deduplicati.patch + patches.suse/selftests-bpf-Rename-progs-tag.c-to-progs-btf_decl_t.patch + patches.suse/selftests-bpf-Add-a-C-test-for-btf_type_tag.patch + patches.suse/selftests-bpf-Clarify-llvm-dependency-with-btf_tag-s.patch + patches.suse/docs-bpf-Update-documentation-for-BTF_KIND_TYPE_TAG-.patch + patches.suse/bpftool-Enable-libbpf-s-strict-mode-by-default.patch + patches.suse/bpf-Extend-BTF_ID_LIST_GLOBAL-with-parameter-for-num.patch + patches.suse/bpf-Introduce-btf_tracing_ids.patch + patches.suse/selftests-bpf-Fix-an-unused-but-set-variable-compile.patch + patches.suse/selftests-bpf-Fix-a-tautological-constant-out-of-ran.patch patches.suse/bpftool-Fix-memory-leak-in-prog_dump.patch patches.suse/bpftool-Remove-inclusion-of-utilities.mak-from-Makef.patch + patches.suse/bpftool-Fix-indent-in-option-lists-in-the-documentat.patch + patches.suse/bpftool-Update-the-lists-of-names-for-maps-and-prog-.patch + patches.suse/bpftool-Fix-mixed-indentation-in-documentation.patch + patches.suse/bpftool-Use-libbpf_get_error-to-check-error.patch patches.suse/tcp-small-optimization-in-tcp-recvmsg.patch patches.suse/tcp-avoid-indirect-calls-to-sock_rfree.patch patches.suse/tcp-defer-skb-freeing-after-socket-lock-is-released.patch @@ -10160,8 +10471,10 @@ patches.suse/net-annotate-accesses-to-dev-gso_max_size.patch patches.suse/net-annotate-accesses-to-dev-gso_max_segs.patch patches.suse/msft-hv-2484-net-mana-Add-XDP-support.patch + patches.suse/pcmcia-hide-the-MAC-address-helpers-if-NET.patch patches.suse/qed-Use-the-bitmap-API-to-simplify-some-functions.patch patches.suse/msft-hv-2485-hv_netvsc-Use-bitmap_zalloc-when-applicable.patch + patches.suse/skbuff-Move-conditional-preprocessor-directives-out-.patch patches.suse/mlxsw-spectrum_router-Remove-deadcode-in-mlxsw_sp_ri.patch patches.suse/devlink-Add-enable_iwarp-generic-device-param.patch patches.suse/net-ice-Add-support-for-enable_iwarp-and-enable_roce.patch @@ -10247,9 +10560,119 @@ patches.suse/net-huawei-hinic-Use-devm_kcalloc-instead-of-devm_kz.patch patches.suse/net-phy-prefer-1000baseT-over-1000baseKX.patch patches.suse/xfrm-add-net-device-refcount-tracker-to-struct-xfrm_.patch + patches.suse/bpftool-Add-current-libbpf_strict-mode-to-version-ou.patch + patches.suse/libbpf-Fix-a-couple-of-missed-btf_type_tag-handling-.patch + patches.suse/selftests-bpf-Add-a-dedup-selftest-with-equivalent-s.patch + patches.suse/bpftool-Add-SPDX-tags-to-RST-documentation-files.patch + patches.suse/bpftool-Update-doc-use-susbtitutions-and-test_bpftoo.patch + patches.suse/selftests-bpf-Configure-dir-paths-via-env-in-test_bp.patch + patches.suse/bpf-Change-value-of-MAX_TAIL_CALL_CNT-from-32-to-33.patch + patches.suse/selftests-bpf-Add-uprobe-triggering-overhead-benchma.patch + patches.suse/selftests-bpf-Move-summary-line-after-the-error-logs.patch + patches.suse/selftests-bpf-Variable-naming-fix.patch + patches.suse/selftests-bpf-Mark-variable-as-static.patch + patches.suse/bpf-docs-Change-underline-in-btf-to-match-style-guid.patch + patches.suse/bpf-docs-Rename-bpf_lsm.rst-to-prog_lsm.rst.patch + patches.suse/bpf-docs-Fix-ordering-of-bpf-documentation.patch + patches.suse/selftests-bpf-Fix-xdpxceiver-failures-for-no-hugepag.patch + patches.suse/selfetests-bpf-Adapt-vmtest.sh-to-s390-libbpf-CI-cha.patch + patches.suse/libbpf-Add-runtime-APIs-to-query-libbpf-version.patch + patches.suse/libbpf-Accommodate-DWARF-compiler-bug-with-duplicate.patch + patches.suse/selftests-bpf-Add-btf_dedup-case-with-duplicated-str.patch + patches.suse/libbpf-Change-bpf_program__set_extra_flags-to-bpf_pr.patch + patches.suse/selftests-bpf-Fix-trivial-typo.patch + patches.suse/libbpf-Load-global-data-maps-lazily-on-legacy-kernel.patch + patches.suse/selftests-bpf-Mix-legacy-maps-and-modern-vars-BPF-in.patch + patches.suse/libbpf-Unify-low-level-map-creation-APIs-w-new-bpf_m.patch + patches.suse/libbpf-Use-bpf_map_create-consistently-internally.patch + patches.suse/libbpf-Prevent-deprecation-warnings-in-xsk.c.patch + patches.suse/selftests-bpf-Migrate-selftests-to-bpf_map_create.patch + patches.suse/tools-resolve_btf_ids-Close-ELF-file-on-error.patch + patches.suse/libbpf-Fix-potential-misaligned-memory-access-in-btf.patch + patches.suse/libbpf-Don-t-call-libc-APIs-with-NULL-pointers.patch + patches.suse/libbpf-Fix-glob_syms-memory-leak-in-bpf_linker.patch + patches.suse/libbpf-Fix-using-invalidated-memory-in-bpf_linker.patch + patches.suse/selftests-bpf-Fix-UBSan-complaint-about-signed-__int.patch + patches.suse/selftests-bpf-Fix-possible-NULL-passed-to-memcpy-wit.patch + patches.suse/selftests-bpf-Prevent-misaligned-memory-access-in-ge.patch + patches.suse/selftests-bpf-Fix-misaligned-memory-access-in-queue_.patch + patches.suse/selftests-bpf-Prevent-out-of-bounds-stack-access-in-.patch + patches.suse/selftests-bpf-Fix-misaligned-memory-accesses-in-xdp_.patch + patches.suse/selftests-bpf-Fix-misaligned-accesses-in-xdp-and-xdp.patch + patches.suse/bpf-mips-Fix-build-errors-about-__NR_bpf-undeclared.patch + patches.suse/libbpf-Support-static-initialization-of-BPF_MAP_TYPE.patch + patches.suse/selftests-bpf-Test-BPF_MAP_TYPE_PROG_ARRAY-static-in.patch + patches.suse/libbpf-Silence-uninitialized-warning-error-in-btf_du.patch + patches.suse/libbpf-Remove-duplicate-assignments.patch + patches.suse/x86-bpf-Cleanup-the-top-of-file-header-in-bpf_jit_co.patch + patches.suse/bpf-Remove-a-redundant-comment-on-bpf_prog_free.patch + patches.suse/bpf-docs-Prune-all-references-to-internal-BPF.patch + patches.suse/bpf-docs-Move-handling-of-maps-to-Documentation-bpf-.patch + patches.suse/bpf-docs-Split-general-purpose-eBPF-documentation-ou.patch + patches.suse/bpf-Add-bpf_loop-helper.patch + patches.suse/selftests-bpf-Add-bpf_loop-test.patch + patches.suse/selftests-bpf-Measure-bpf_loop-verifier-performance.patch + patches.suse/selftest-bpf-benchs-Add-bpf_loop-benchmark.patch + patches.suse/bpf-Change-bpf_kallsyms_lookup_name-size-type-to-ARG.patch + patches.suse/libbpf-Avoid-double-stores-for-success-failure-case-.patch + patches.suse/libbpf-Avoid-reload-of-imm-for-weak-unresolved-repea.patch + patches.suse/bpf-Clean-up-bpf_verifier_vlog-for-BPF_LOG_KERNEL-lo.patch + patches.suse/samples-bpf-Fix-conflicting-types-in-fds_example.patch + patches.suse/libbpf-Replace-btf__type_by_id-with-btf_type_by_id.patch + patches.suse/bpf-Rename-btf_member-accessors.patch + patches.suse/bpf-Prepare-relo_core.c-for-kernel-duty.patch + patches.suse/bpf-Define-enum-bpf_core_relo_kind-as-uapi.patch + patches.suse/bpf-Pass-a-set-of-bpf_core_relo-s-to-prog_load-comma.patch patches.suse/bpf-Adjust-BTF-log-size-limit.patch + patches.suse/libbpf-Cleanup-struct-bpf_core_cand.patch + patches.suse/bpf-Add-bpf_core_add_cands-and-wire-it-into-bpf_core.patch + patches.suse/libbpf-Use-CO-RE-in-the-kernel-in-light-skeleton.patch + patches.suse/libbpf-Support-init-of-inner-maps-in-light-skeleton.patch + patches.suse/libbpf-Clean-gen_loader-s-attach-kind.patch + patches.suse/selftests-bpf-Add-lskel-version-of-kfunc-test.patch + patches.suse/selftests-bpf-Improve-inner_map-test-coverage.patch + patches.suse/selftests-bpf-Convert-map_ptr_kern-test-to-use-light.patch + patches.suse/selftests-bpf-Additional-test-for-CO-RE-in-the-kerne.patch + patches.suse/selftests-bpf-Revert-CO-RE-removal-in-test_ksyms_wea.patch + patches.suse/selftests-bpf-Add-CO-RE-relocations-to-verifier-scal.patch + patches.suse/selftests-bpf-Build-testing_helpers.o-out-of-tree.patch + patches.suse/selftests-bpf-Update-test-names-for-xchg-and-cmpxchg.patch + patches.suse/libbpf-Use-__u32-fields-in-bpf_map_create_opts.patch + patches.suse/libbpf-Add-API-to-get-set-log_level-at-per-program-l.patch + patches.suse/bpftool-Migrate-off-of-deprecated-bpf_create_map_xat.patch + patches.suse/selftests-bpf-Remove-recently-reintroduced-legacy-bt.patch + patches.suse/selftests-bpf-Mute-xdpxceiver.c-s-deprecation-warnin.patch + patches.suse/selftests-bpf-Remove-all-the-uses-of-deprecated-bpf_.patch + patches.suse/samples-bpf-Clean-up-samples-bpf-build-failes.patch + patches.suse/samples-bpf-Get-rid-of-deprecated-libbpf-API-uses.patch + patches.suse/libbpf-Deprecate-bpf_prog_load_xattr-API.patch + patches.suse/libbpf-Reduce-bpf_core_apply_relo_insn-stack-usage.patch + patches.suse/bpf-Fix-the-test_task_vma-selftest-to-support-output.patch patches.suse/bpf-Disallow-BPF_LOG_KERNEL-log-level-for-bpf-BPF_BTF_LOAD.patch + patches.suse/bpftool-Add-debug-mode-for-gen_loader.patch patches.suse/bpf-Remove-config-check-to-enable-bpf-support-for-branch-records.patch + patches.suse/libbpf-Fix-trivial-typo.patch + patches.suse/libbpf-Add-doc-comments-in-libbpf.h.patch + patches.suse/bpf-Silence-purge_cand_cache-build-warning.patch + patches.suse/samples-bpf-Fix-xdp_sample_user.o-linking-with-Clang.patch + patches.suse/samples-bpf-Fix-unknown-warning-group-build-warning-.patch + patches.suse/selftests-bpf-Fix-a-compilation-warning.patch + patches.suse/bpf-Remove-redundant-assignment-to-pointer-t.patch + patches.suse/samples-bpf-Remove-unneeded-variable.patch + patches.suse/libbpf-Fix-bpf_prog_load-log_buf-logic-for-log_level.patch + patches.suse/libbpf-Add-OPTS-based-bpf_btf_load-API.patch + patches.suse/libbpf-Allow-passing-preallocated-log_buf-when-loadi.patch + patches.suse/libbpf-Allow-passing-user-log-setting-through-bpf_ob.patch + patches.suse/libbpf-Improve-logging-around-BPF-program-loading.patch + patches.suse/libbpf-Preserve-kernel-error-code-and-remove-kprobe-.patch + patches.suse/libbpf-Add-per-program-log-buffer-setter-and-getter.patch + patches.suse/libbpf-Deprecate-bpf_object__load_xattr.patch + patches.suse/selftests-bpf-Replace-all-uses-of-bpf_load_btf-with-.patch + patches.suse/selftests-bpf-Add-test-for-libbpf-s-custom-log_buf-b.patch + patches.suse/selftests-bpf-Remove-the-only-use-of-deprecated-bpf_.patch + patches.suse/bpftool-Switch-bpf_object__load_xattr-to-bpf_object_.patch + patches.suse/libbpf-Fix-typo-in-btf__dedup-LIBBPF_0.0.2-definitio.patch + patches.suse/libbpf-Add-bool-skipped-to-struct-bpf_map.patch patches.suse/net-bna-Update-supported-link-modes.patch patches.suse/u64_stats-Disable-preemption-on-32bit-UP-SMP-PREEMPT.patch patches.suse/ARM-dts-qcom-sdx55-fix-IPA-interconnect-definitions.patch @@ -10541,9 +10964,49 @@ patches.suse/net-mlx5-DR-Improve-steering-for-empty-or-RX-TX-only.patch patches.suse/net-mlx5-DR-Ignore-modify-TTL-if-device-doesn-t-supp.patch patches.suse/net-mlx5-Set-SMFS-as-a-default-steering-mode-if-devi.patch + patches.suse/libbpf-Fix-gen_loader-assumption-on-number-of-progra.patch + patches.suse/bpf-Add-bpf_strncmp-helper.patch + patches.suse/selftests-bpf-Fix-checkpatch-error-on-empty-function.patch + patches.suse/selftests-bpf-Add-benchmark-for-bpf_strncmp-helper.patch + patches.suse/selftests-bpf-Add-test-cases-for-bpf_strncmp.patch + patches.suse/bpf-Use-kmemdup-to-replace-kmalloc-memcpy.patch + patches.suse/bpf-Silence-coverity-false-positive-warning.patch + patches.suse/selftests-bpf-Remove-last-bpf_create_map_xattr-from-.patch + patches.suse/libbpf-Don-t-validate-TYPE_ID-relo-s-original-imm-va.patch + patches.suse/bpf-Allow-access-to-int-pointer-arguments-in-tracing.patch + patches.suse/selftests-bpf-Add-test-to-access-int-ptr-argument-in.patch + patches.suse/bpf-x64-Replace-some-stack_size-usage-with-offset-va.patch + patches.suse/bpf-Add-get_func_-arg-ret-arg_cnt-helpers.patch + patches.suse/selftests-bpf-Add-tests-for-get_func_-arg-ret-arg_cn.patch patches.suse/bpf-Do-not-WARN-in-bpf_warn_invalid_xdp_action.patch patches.suse/bpf-Let-bpf_warn_invalid_xdp_action-report-more-info.patch + patches.suse/selftests-bpf-Fix-segfault-in-bpf_tcp_ca.patch + patches.suse/libbpf-Add-doc-comments-for-bpf_program__-un-pin.patch patches.suse/xsk-Wipe-out-dead-zero_copy_allocator-declarations.patch + patches.suse/libbpf-Fix-potential-uninit-memory-read.patch + patches.suse/libbpf-Add-sane-strncpy-alternative-and-use-it-inter.patch + patches.suse/libbpf-Auto-bump-RLIMIT_MEMLOCK-if-kernel-needs-it-f.patch + patches.suse/selftests-bpf-Remove-explicit-setrlimit-RLIMIT_MEMLO.patch + patches.suse/selftests-bpf-Stop-using-bpf_object__find_program_by.patch + patches.suse/samples-bpf-Stop-using-bpf_object__find_program_by_t.patch + patches.suse/tools-perf-Stop-using-bpf_object__find_program_by_ti.patch + patches.suse/libbpf-Mark-bpf_object__find_program_by_title-API-de.patch + patches.suse/libbpf-Avoid-reading-past-ELF-data-section-end-when-.patch + patches.suse/tools-Help-cross-building-with-clang.patch + patches.suse/tools-resolve_btfids-Support-cross-building-the-kern.patch + patches.suse/tools-libbpf-Enable-cross-building-with-clang.patch + patches.suse/bpftool-Enable-cross-building-with-clang.patch + patches.suse/tools-runqslower-Enable-cross-building-with-clang.patch + patches.suse/selftests-bpf-Enable-cross-building-with-clang.patch + patches.suse/add-includes-masked-by-cgroup-bpf-dependency.patch + patches.suse/add-missing-bpf-cgroup.h-includes.patch + patches.suse/bpf-Remove-the-cgroup-bpf-header-dependecy.patch + patches.suse/bpf-Only-print-scratched-registers-and-stack-slots-t.patch + patches.suse/bpf-Right-align-verifier-states-in-verifier-logs.patch + patches.suse/Only-output-backtracking-information-in-log-level-2.patch + patches.suse/libbpf-Rework-feature-probing-APIs.patch + patches.suse/selftests-bpf-Add-libbpf-feature-probing-API-selftes.patch + patches.suse/bpftool-Reimplement-large-insn-size-limit-feature-pr.patch patches.suse/bpf-Introduce-composable-reg-ret-and-arg-types.patch patches.suse/bpf-Replace-ARG_XXX_OR_NULL-with-ARG_XXX-PTR_MAYBE_N.patch patches.suse/bpf-Replace-RET_XXX_OR_NULL-with-RET_XXX-PTR_MAYBE_N.patch @@ -10553,7 +11016,26 @@ patches.suse/bpf-Make-per_cpu_ptr-return-rdonly-PTR_TO_MEM.patch patches.suse/bpf-Add-MEM_RDONLY-for-helper-args-that-are-pointers.patch patches.suse/bpf-selftests-Test-PTR_TO_RDONLY_MEM.patch + patches.suse/bpf-Extend-kfunc-with-PTR_TO_CTX-PTR_TO_MEM-argument.patch + patches.suse/selftests-bpf-Correct-the-INDEX-address-in-vmtest.sh.patch + patches.suse/bpf-Use-struct_size-helper.patch + patches.suse/bpftool-Enable-line-buffering-for-stdout.patch + patches.suse/libbpf-Do-not-use-btf_dump__new-macro-in-C-mode.patch + patches.suse/selftests-bpf-Add-btf_dump__new-to-test_cpp.patch + patches.suse/libbpf-Normalize-PT_REGS_xxx-macro-definitions.patch + patches.suse/libbpf-Use-100-character-limit-to-make-bpf_tracing.h.patch + patches.suse/libbpf-Improve-LINUX_VERSION_CODE-detection.patch patches.suse/net-Don-t-include-filter.h-from-net-sock.h.patch + patches.suse/bpf-Add-missing-map_get_next_key-method-to-bloom-fil.patch + patches.suse/bpf-Allow-bpf_local_storage-to-be-used-by-sleepable-.patch + patches.suse/bpf-selftests-Update-local-storage-selftest-for-slee.patch + patches.suse/net-Add-includes-masked-by-netdevice.h-including-uap.patch + patches.suse/bpf-Invert-the-dependency-between-bpf-netns.h-and-ne.patch + patches.suse/bpf-docs-Fix-verifier-references.patch + patches.suse/bpf-docs-Split-the-comparism-to-classic-BPF-from-ins.patch + patches.suse/bpf-docs-Generate-nicer-tables-for-instruction-encod.patch + patches.suse/bpf-docs-Move-the-packet-access-instructions-last-in.patch + patches.suse/bpf-Fix-typo-in-a-comment-in-bpf-lpm_trie.patch patches.suse/net-smc-remove-redundant-re-assignment-of-pointer-link patches.suse/qed-Use-dma_set_mask_and_coherent-and-simplify-code.patch patches.suse/batman-adv-allow-netlink-usage-in-unprivileged-conta.patch @@ -10567,6 +11049,47 @@ patches.suse/gro-add-ability-to-control-gro-max-packet-size.patch patches.suse/sfc-Use-swap-instead-of-open-coding-it.patch patches.suse/veth-Do-not-record-rx-queue-hint-in-veth_xmit.patch + patches.suse/bpftool-Refactor-misc.-feature-probe.patch + patches.suse/bpftool-Probe-for-bounded-loop-support.patch + patches.suse/bpftool-Probe-for-instruction-set-extensions.patch + patches.suse/bpf-selftests-Fix-namespace-mount-setup-in-tc_redire.patch + patches.suse/bpf-arm64-Use-emit_addr_mov_i64-for-BPF_PSEUDO_FUNC.patch + patches.suse/bpf-sockmap-Fix-return-codes-from-tcp_bpf_recvmsg_pa.patch + patches.suse/bpf-sockmap-Fix-double-bpf_prog_put-on-error-case-in.patch + patches.suse/bpf-Don-t-promote-bogus-looking-registers-after-null.patch + patches.suse/bpf-selftests-Add-verifier-test-for-mem_or_null-regi.patch + patches.suse/bpf-docs-Add-a-setion-to-explain-the-basic-instructi.patch + patches.suse/bpf-docs-Add-subsections-for-ALU-and-JMP-instruction.patch + patches.suse/bpf-docs-Document-the-opcode-classes.patch + patches.suse/bpf-docs-Fully-document-the-ALU-opcodes.patch + patches.suse/bpf-docs-Fully-document-the-JMP-opcodes.patch + patches.suse/bpf-docs-Fully-document-the-JMP-mode-modifiers.patch + patches.suse/bpf-Fix-verifier-support-for-validation-of-async-cal.patch + patches.suse/bpf-Fix-SO_RCVBUF-SO_SNDBUF-handling-in-_bpf_setsock.patch + patches.suse/bpf-Add-SO_RCVBUF-SO_SNDBUF-in-_bpf_getsockopt.patch + patches.suse/libbpf-Deprecate-bpf_perf_event_read_simple-API.patch + patches.suse/libbpf-Use-probe_name-for-legacy-kprobe.patch + patches.suse/libbpf-Support-repeated-legacy-kprobes-on-same-funct.patch + patches.suse/libbpf-1.0-Deprecate-bpf_map__is_offload_neutral.patch + patches.suse/libbpf-1.0-Deprecate-bpf_object__find_map_by_offset-.patch + patches.suse/samples-bpf-xdpsock-Add-VLAN-support-for-Tx-only-ope.patch + patches.suse/samples-bpf-xdpsock-Add-Dest-and-Src-MAC-setting-for.patch + patches.suse/samples-bpf-xdpsock-Add-clockid-selection-support.patch + patches.suse/samples-bpf-xdpsock-Add-cyclic-TX-operation-capabili.patch + patches.suse/samples-bpf-xdpsock-Add-sched-policy-and-priority-su.patch + patches.suse/samples-bpf-xdpsock-Add-time-out-for-cleaning-Tx.patch + patches.suse/samples-bpf-xdpsock-Add-timestamp-for-Tx-only-operat.patch + patches.suse/xdp-Allow-registering-memory-model-without-rxq-refer.patch + patches.suse/page_pool-Add-callback-to-init-pages-when-they-are-a.patch + patches.suse/page_pool-Store-the-XDP-mem-id.patch + patches.suse/xdp-Move-conversion-to-xdp_frame-out-of-map-function.patch + patches.suse/xdp-Add-xdp_do_redirect_frame-for-pre-computed-xdp_f.patch + patches.suse/selftests-bpf-Don-t-rely-on-preserving-volatile-in-P.patch + patches.suse/libbpf-Add-documentation-for-bpf_map-batch-operation.patch + patches.suse/bpf-selftests-Test-bpf_d_path-on-rdonly_mem.patch + patches.suse/net-bpf-Handle-return-value-of-BPF_CGROUP_RUN_PROG_I.patch + patches.suse/bpf-selftests-Use-C99-initializers-in-test_sock.c.patch + patches.suse/bpf-selftests-Add-bind-retry-for-post_bind-4-6.patch patches.suse/mlxsw-Rename-virtual-router-flex-key-element.patch patches.suse/mlxsw-Introduce-flex-key-elements-for-Spectrum-4.patch patches.suse/mlxsw-spectrum_acl_bloom_filter-Reorder-functions-to.patch @@ -11062,6 +11585,8 @@ patches.suse/genirq-Provide-new-interfaces-for-affinity-hints.patch patches.suse/iavf-Use-irq_update_affinity_hint.patch patches.suse/i40e-Use-irq_update_affinity_hint.patch + patches.suse/scsi-megaraid_sas-Use-irq_set_affinity_and_hint.patch + patches.suse/scsi-mpt3sas-Use-irq_set_affinity_and_hint.patch patches.suse/RDMA-irdma-Use-irq_update_affinity_hint.patch patches.suse/be2net-Use-irq_update_affinity_hint.patch patches.suse/ixgbe-Use-irq_update_affinity_hint.patch @@ -11126,6 +11651,7 @@ patches.suse/scsi-pm80xx-Update-WARN_ON-check-in-pm8001_mpi_build_cmd patches.suse/scsi-qedi-Remove-set-but-unused-page-variable.patch patches.suse/scsi-core-Show-SCMD_LAST-in-text-form + patches.suse/scsi-megaraid-Fix-a-kernel-doc-warning.patch patches.suse/scsi-pm8001-Fix-kernel-doc-warnings patches.suse/scsi-pmcraid-Fix-a-kernel-doc-warning.patch patches.suse/scsi-qedi-Fix-SYSFS_FLAG_FW_SEL_BOOT-formatting.patch @@ -11144,6 +11670,7 @@ patches.suse/scsi-lpfc-Update-lpfc-version-to-14.0.0.4.patch patches.suse/scsi-lpfc-Use-struct_group-to-initialize-struct-lpfc.patch patches.suse/scsi-lpfc-Use-struct_group-to-isolate-cast-to-larger.patch + patches.suse/scsi-hpsa-Remove-an-unused-variable-in-hpsa_update_s.patch patches.suse/qla2xxx-synchronize-rport-dev_loss_tmo-setting.patch patches.suse/scsi-libsas-Don-t-always-drain-event-workqueue-for-HA-resume.patch patches.suse/scsi-Revert-scsi-hisi_sas-Filter-out-new-PHY-up-events-during-suspend @@ -11468,6 +11995,14 @@ patches.suse/powerpc-xive-Change-the-debugfs-file-xive-into-a-dir.patch patches.suse/powerpc-xive-Rename-the-cpus-debugfs-file-to-ipis.patch patches.suse/powerpc-xive-Add-a-debugfs-file-to-dump-EQs.patch + patches.suse/bpf-powerpc-Remove-unused-SEEN_STACK.patch + patches.suse/bpf-powerpc-Remove-extra_pass-from-bpf_jit_build_bod.patch + patches.suse/bpf-powerpc-refactor-JIT-compiler-code.patch + patches.suse/powerpc-ppc-opcode-introduce-PPC_RAW_BRANCH-macro.patch + patches.suse/bpf-ppc64-Add-BPF_PROBE_MEM-support-for-JIT.patch + patches.suse/bpf-ppc64-Access-only-if-addr-is-kernel-address.patch + patches.suse/bpf-ppc32-Add-BPF_PROBE_MEM-support-for-JIT.patch + patches.suse/bpf-ppc32-Access-only-if-addr-is-kernel-address.patch patches.suse/powerpc-prom_init-Fix-improper-check-of-prom_getprop.patch patches.suse/powerpc-watchdog-Fix-missed-watchdog-reset-due-to-me.patch patches.suse/powerpc-watchdog-tighten-non-atomic-read-modify-writ.patch @@ -11645,6 +12180,9 @@ patches.suse/KVM-x86-Handle-32-bit-wrap-of-EIP-for-EMULTYPE_SKIP-.patch patches.suse/KVM-x86-Exit-to-userspace-if-emulation-prepared-a-co.patch patches.suse/KVM-nVMX-Ensure-vCPU-honors-event-request-if-posting.patch + patches.suse/KVM-s390-gaccess-Refactor-gpa-and-length-calculation + patches.suse/KVM-s390-gaccess-Refactor-access-address-range-check + patches.suse/KVM-s390-gaccess-Cleanup-access-to-guest-pages patches.suse/KVM-s390-Clarify-SIGP-orders-versus-STOP-RESTART patches.suse/KVM-arm64-Drop-unused-workaround_flags-vcpu-field.patch patches.suse/selftests-KVM-sev_migrate_tests-Fix-sev_ioctl.patch @@ -11746,6 +12284,8 @@ patches.suse/dmaengine-at_xdmac-Fix-at_xdmac_lld-struct-definitio.patch patches.suse/random-fix-typo-in-comments.patch patches.suse/f2fs-fix-to-do-sanity-check-on-inode-type-during-gar.patch + patches.suse/samples-bpf-test_overhead_kprobe_kern-replace-bpf_pr.patch + patches.suse/tools-bpf-bpftool-skeleton-replace-bpf_probe_read_ke.patch patches.suse/list-introduce-list_is_head-helper-and-re-use-it-in-.patch patches.suse/hash.h-remove-unused-define-directive.patch patches.suse/libcxgb-Don-t-accidentally-set-RTO_ONLINK-in-cxgb_fi.patch @@ -11766,11 +12306,18 @@ patches.suse/net-tls-Fix-another-skb-memory-leak-when-running-kTL.patch patches.suse/net-Flush-deferred-skb-free-on-socket-destroy.patch patches.suse/net-sfp-fix-high-power-modules-without-diagnostic-mo.patch + patches.suse/bpf-Fix-mount-source-show-for-bpffs.patch + patches.suse/xdp-check-prog-type-before-updating-BPF-link.patch + patches.suse/bpf-selftests-convert-xdp_link-test-to-ASSERT_-macro.patch + patches.suse/bpf-selftests-Add-check-for-updating-XDP-bpf_link-wi.patch + patches.suse/bpf-Fix-incorrect-integer-literal-used-for-marking-s.patch patches.suse/bpf-Generalize-check_ctx_reg-for-reuse-with-other-ty.patch patches.suse/bpf-Mark-PTR_TO_FUNC-register-initially-with-zero-of.patch patches.suse/bpf-Generally-fix-helper-register-offset-check.patch patches.suse/bpf-Fix-out-of-bounds-access-for-ringbuf-helpers.patch + patches.suse/bpf-Fix-ringbuf-memory-type-confusion-when-passing-t.patch patches.suse/bpf-selftests-Add-various-ringbuf-tests-with-invalid.patch + patches.suse/bpf-selftests-Add-ringbuf-memory-type-confusion-test.patch patches.suse/net-axienet-increase-reset-timeout.patch patches.suse/net-axienet-Wait-for-PhyRstCmplt-after-core-reset.patch patches.suse/net-axienet-reset-core-on-initialization-prior-to-MD.patch @@ -11793,6 +12340,7 @@ patches.suse/gpio-mpc8xxx-Fix-an-ignored-error-return-from-platfo.patch patches.suse/s390-cpumf-Support-for-CPU-Measurement-Facility-CSVN-7 patches.suse/s390-cpumf-Support-for-CPU-Measurement-Sampling-Facility-LS-bit + patches.suse/s390-uaccess-introduce-bit-field-for-OAC-specifier patches.suse/x86-gpu-Reserve-stolen-memory-for-first-integrated-I.patch patches.suse/clk-si5341-Fix-clock-HW-provider-cleanup.patch patches.suse/drm-i915-display-ehl-Update-voltage-swing-table.patch @@ -11835,6 +12383,7 @@ patches.suse/ACPI-CPPC-Drop-redundant-local-variable-from-cpc_rea.patch patches.suse/ACPI-DPTF-Support-Raptor-Lake.patch patches.suse/scsi-qedf-Fix-potential-dereference-of-NULL-pointer + patches.suse/scsi-mpt3sas-Update-persistent-trigger-pages-from-sy.patch patches.suse/scsi-mpi3mr-Fix-some-spelling-mistakes.patch patches.suse/scsi-mpi3mr-Fix-formatting-problems-in-some-kernel-doc-comments.patch patches.suse/scsi-hisi_sas-Remove-unused-variable-and-check-in-hisi_sas_send_ata_reset_each_phy @@ -11876,7 +12425,11 @@ patches.suse/perf-x86-intel-uncore-Add-IMC-uncore-support-for-ADL.patch patches.suse/psi-Fix-uaf-issue-when-psi-trigger-is-destroyed-whil.patch patches.suse/sched-pelt-Relax-the-sync-of-util_sum-with-util_avg.patch + patches.suse/bpf-Guard-against-accessing-NULL-pt_regs-in-bpf_get_.patch + patches.suse/powerpc32-bpf-Fix-codegen-for-bpf-to-bpf-calls.patch patches.suse/powerpc-bpf-Update-ldimm64-instructions-during-extra.patch + patches.suse/tools-bpf-Rename-struct-event-to-avoid-naming-confli.patch + patches.suse/powerpc64-bpf-Limit-ldbrx-to-processors-compliant-wi.patch patches.suse/powerpc-64s-Mask-SRR0-before-checking-against-the-ma.patch patches.suse/ARM-9170-1-fix-panic-when-kasan-and-kprobe-are-enabl.patch patches.suse/ARM-9180-1-Thumb2-align-ALT_UP-sections-in-modules-s.patch @@ -11962,6 +12515,7 @@ patches.suse/ceph-set-pool_ns-in-new-inode-layout-for-async-creates.patch patches.suse/s390-nmi-handle-guarded-storage-validity-failures-for-KVM-guests patches.suse/s390-nmi-handle-vector-validity-failures-for-KVM-guests + patches.suse/s390-uaccess-fix-compile-error patches.suse/s390-module-fix-loading-modules-with-a-lot-of-relocations patches.suse/s390-hypfs-include-z-VM-guests-with-access-control-group-set patches.suse/KVM-selftests-Re-enable-access_tracking_perf_test.patch @@ -12078,6 +12632,7 @@ patches.suse/net-ieee802154-mcr20a-Fix-lifs-sifs-periods.patch patches.suse/net-ieee802154-ca8210-Stop-leaking-skb-s.patch patches.suse/net-stmmac-properly-handle-with-runtime-pm-in-stmmac.patch + patches.suse/net-smc-Forward-wakeup-to-smc-socket-waitqueue-after-fallback patches.suse/net-dsa-mt7530-make-NET_DSA_MT7530-select-MEDIATEK_G.patch patches.suse/net-stmmac-dump-gmac4-DMA-registers-correctly.patch patches.suse/net-macsec-Fix-offload-support-for-NETDEV_UNREGISTER.patch @@ -12106,6 +12661,11 @@ patches.suse/net-mlx5e-Avoid-implicit-modify-hdr-for-decap-drop-r.patch patches.suse/net-mlx5e-Use-struct_group-for-memcpy-region.patch patches.suse/net-mlx5e-Avoid-field-overflowing-memcpy.patch + patches.suse/bpf-Fix-renaming-task_getsecid_subj-current_getsecid.patch + patches.suse/bpf-Fix-possible-race-in-inc_misses_counter.patch + patches.suse/tools-headers-UAPI-remove-stale-lirc.h.patch + patches.suse/bpf-Use-VM_MAP-instead-of-VM_ALLOC-for-ringbuf.patch + patches.suse/tools-resolve_btfids-Do-not-print-any-commands-when-.patch patches.suse/net-stmmac-ensure-PTP-time-register-reads-are-consis.patch patches.suse/ax25-fix-reference-count-leaks-of-ax25_dev.patch patches.suse/Fix-a-warning-about-a-malformed-kernel-doc-comment-in-cifs.patch @@ -12301,6 +12861,7 @@ patches.suse/usb-gadget-f_uac2-Define-specific-wTerminalType.patch patches.suse/usb-dwc3-gadget-Prevent-core-from-processing-stale-T.patch patches.suse/net-usb-ax88179_178a-Fix-out-of-bounds-accesses-in-R.patch + patches.suse/usb-core-Unregister-device-on-component_add-failure.patch patches.suse/USB-gadget-validate-interface-OS-descriptor-requests.patch patches.suse/usb-gadget-rndis-check-size-of-RNDIS_MSG_SET-command.patch patches.suse/usb-dwc2-drd-fix-soft-connect-when-gadget-is-unconfi.patch @@ -12338,6 +12899,7 @@ patches.suse/regulator-core-fix-false-positive-in-regulator_late_.patch patches.suse/msft-hv-2514-PCI-hv-Fix-NUMA-node-assignment-when-kernel-boots-wi.patch patches.suse/msft-hv-2515-Drivers-hv-vmbus-Fix-memory-leak-in-vmbus_add_channe.patch + patches.suse/msft-hv-2526-Drivers-hv-vmbus-Rework-use-of-DMA_BIT_MASK-64.patch patches.suse/msft-hv-2517-Drivers-hv-utils-Make-use-of-the-helper-macro-LIST_H.patch patches.suse/HID-amd_sfh-Add-illuminance-mask-to-limit-ALS-max-va.patch patches.suse/HID-Add-support-for-UGTABLET-WP5540.patch @@ -12355,6 +12917,7 @@ patches.suse/mmc-block-fix-read-single-on-recovery-logic.patch patches.suse/mm-don-t-try-to-NUMA-migrate-COW-pages-that-have-other-uses.patch patches.suse/libsubcmd-Fix-use-after-free-for-realloc-.-0.patch + patches.suse/net-smc-Avoid-overwriting-the-copies-of-clcsock-callback-functions patches.suse/selftests-netfilter-fix-exit-value-for-nft_concat_ra.patch patches.suse/selftests-netfilter-disable-rp_filter-on-router.patch patches.suse/brcmfmac-firmware-Fix-crash-in-brcm_alt_fw_path.patch @@ -12478,7 +13041,14 @@ patches.suse/io_uring-add-a-schedule-point-in-io_add_buffers.patch patches.suse/nvme-don-t-return-an-error-from-nvme_configure_metad.patch patches.suse/nvme-also-mark-passthrough-only-namespaces-ready-in-.patch + patches.suse/bpf-Do-not-try-bpf_msg_push_data-with-len-0.patch + patches.suse/bpf-Fix-crash-due-to-incorrect-copy_map_value.patch + patches.suse/selftests-bpf-Add-test-for-bpf_timer-overwriting-cra.patch + patches.suse/bpf-Emit-bpf_timer-in-vmlinux-BTF.patch + patches.suse/bpf-Fix-a-bpf_timer-initialization-issue.patch + patches.suse/selftests-bpf-Check-bpf_msg_push_data-return-value.patch patches.suse/bpf-Fix-crash-due-to-out-of-bounds-access-into-reg2b.patch + patches.suse/bpf-Add-schedule-points-in-batch-ops.patch patches.suse/drivers-hamradio-6pack-fix-UAF-bug-caused-by-mod_tim.patch patches.suse/sr9700-sanity-check-for-packet-length.patch patches.suse/net-ll_temac-check-the-return-value-of-devm_kmalloc.patch @@ -12674,6 +13244,8 @@ patches.suse/batman-adv-Request-iflink-once-in-batadv_get_real_ne.patch patches.suse/batman-adv-Don-t-expect-inter-netns-unique-iflink-in.patch patches.suse/net-ipa-add-an-interconnect-dependency.patch + patches.suse/bpf-sockmap-Do-not-ignore-orig_len-parameter.patch + patches.suse/tcp-make-tcp_read_sock-more-robust.patch patches.suse/net-smc-fix-unexpected-SMC_CLC_DECL_ERR_REGRMB-error-generated-by-client patches.suse/net-smc-fix-unexpected-SMC_CLC_DECL_ERR_REGRMB-error-cause-by-server patches.suse/sfc-extend-the-locking-on-mcdi-seqno.patch @@ -13604,6 +14176,18 @@ patches.suse/TOMOYO-fix-__setup-handlers-return-values.patch patches.suse/KVM-x86-mmu-Move-invalid-check-out-of-kvm_tdp_mmu_ge.patch patches.suse/KVM-x86-mmu-Zap-_all_-roots-when-unmapping-gfn-range.patch + patches.suse/s390-uaccess-Add-copy_from-to_user_key-functions + patches.suse/KVM-s390-Honor-storage-keys-when-accessing-guest-memory + patches.suse/KVM-s390-handle_tprot-Honor-storage-keys + patches.suse/KVM-s390-selftests-Test-TEST-PROTECTION-emulation + patches.suse/KVM-s390-Add-optional-storage-key-checking-to-MEMOP-IOCTL + patches.suse/KVM-s390-Add-vm-IOCTL-for-key-checked-guest-absolute-memory-access + patches.suse/KVM-s390-Rename-existing-vcpu-memop-functions + patches.suse/KVM-s390-Add-capability-for-storage-key-extension-of-MEM_OP-IOCTL + patches.suse/KVM-s390-Update-api-documentation-for-memop-ioctl + patches.suse/selftests-kvm-Check-whether-SIDA-memop-fails-for-normal-guests + patches.suse/KVM-s390-Clarify-key-argument-for-MEM_OP-in-api-docs + patches.suse/KVM-s390-Add-missing-vm-MEM_OP-size-check patches.suse/KVM-x86-hyper-v-Drop-redundant-ex-parameter-from-kvm-ipi.patch patches.suse/KVM-x86-hyper-v-Drop-redundant-ex-parameter-from-kvm.patch patches.suse/KVM-x86-hyper-v-Fix-the-maximum-number-of-sparse-ban.patch @@ -13619,6 +14203,11 @@ patches.suse/KVM-x86-mmu-WARN-if-old-_or_-new-SPTE-is-REMOVED-in-.patch patches.suse/kvm-svm-allow-avic-support-on-system-w-physical-apic-id-255 patches.suse/KVM-s390x-fix-SCK-locking + patches.suse/KVM-s390-selftests-Split-memop-tests + patches.suse/KVM-s390-selftests-Add-macro-as-abstraction-for-MEM_OP + patches.suse/KVM-s390-selftests-Add-named-stages-for-memop-test + patches.suse/KVM-s390-selftests-Add-more-copy-memop-tests + patches.suse/KVM-s390-selftests-Add-error-memop-tests patches.suse/msft-hv-2519-Drivers-hv-vmbus-Use-struct_size-helper-in-kmalloc.patch patches.suse/msft-hv-2520-Drivers-hv-Rename-alloced-to-allocated.patch patches.suse/msft-hv-2521-Drivers-hv-Compare-cpumasks-and-not-their-weights-in.patch @@ -13633,6 +14222,7 @@ patches.suse/net-mlx5-Expose-APIs-to-get-put-the-mlx5-core-device.patch patches.suse/net-mlx5-Introduce-migration-bits-and-structures.patch patches.suse/net-mlx5-Add-migration-commands-definitions.patch + patches.suse/PCI-IOV-Fix-wrong-kernel-doc-identifier.patch patches.suse/crypto-hisilicon-qm-Move-the-QM-header-to-include-li.patch patches.suse/crypto-hisilicon-qm-Move-few-definitions-to-common-h.patch patches.suse/hisi_acc_qm-Move-VF-PCI-device-IDs-to-common-header.patch @@ -13703,6 +14293,7 @@ patches.suse/Bluetooth-btusb-Whitespace-fixes-for-btusb_setup_csr.patch patches.suse/Bluetooth-hci_serdev-call-init_rwsem-before-p-open.patch patches.suse/selftests-net-timestamping-Fix-bind_phc-check.patch + patches.suse/net-smc-Send-directly-when-TCP_CORK-is-cleared patches.suse/msft-hv-2516-net-mana-Add-counter-for-packet-dropped-by-XDP.patch patches.suse/msft-hv-2517-net-mana-Add-counter-for-XDP_TX.patch patches.suse/msft-hv-2518-net-mana-Reuse-XDP-dropped-page.patch @@ -13768,6 +14359,7 @@ patches.suse/octeontx2-pf-Add-TC-feature-for-VFs.patch patches.suse/ath10k-fix-memory-overwrite-of-the-WoWLAN-wakeup-pac.patch patches.suse/ath5k-fix-OOB-in-ath5k_eeprom_read_pcal_info_5111.patch + patches.suse/ath10k-abstract-htt_rx_desc-structure.patch patches.suse/ath9k_htc-fix-uninit-value-bugs.patch patches.suse/rtw88-rtw8821c-enable-rfe-6-devices.patch patches.suse/ray_cs-Check-ioremap-return-value.patch @@ -13776,6 +14368,7 @@ patches.suse/brcmfmac-pcie-Declare-missing-firmware-files-in-pcie.patch patches.suse/brcmfmac-pcie-Replace-brcmf_pcie_copy_mem_todev-with.patch patches.suse/brcmfmac-pcie-Fix-crashes-due-to-early-IRQs.patch + patches.suse/mac80211-limit-bandwidth-in-HE-capabilities.patch patches.suse/cfg80211-mac80211-assume-CHECKSUM_COMPLETE-includes-.patch patches.suse/cfg80211-don-t-add-non-transmitted-BSS-to-6GHz-scann.patch patches.suse/mt76-connac-fix-sta_rec_wtbl-tag-len.patch @@ -13856,6 +14449,7 @@ patches.suse/RDMA-mlx5-Use-new-command-interface-API.patch patches.suse/net-mlx5-Add-reset_state-field-to-MFRL-register.patch patches.suse/net-mlx5-Add-clarification-on-sync-reset-failure.patch + patches.suse/net-smc-send-directly-on-setting-TCP_NODELAY patches.suse/sfc-default-config-to-1-channel-core-in-local-NUMA-n.patch patches.suse/sfc-set-affinity-hints-in-local-NUMA-node-only.patch patches.suse/tuntap-add-sanity-checks-about-msg_controllen-in-sen.patch @@ -14031,6 +14625,7 @@ patches.suse/ath11k-mhi-use-mhi_sync_power_up.patch patches.suse/wcn36xx-Differentiate-wcn3660-from-wcn3620.patch patches.suse/carl9170-fix-missing-bit-wise-or-operator-for-tx_par.patch + patches.suse/ath10k-fix-pointer-arithmetic-error-in-trace-call.patch patches.suse/iwlwifi-bump-FW-API-to-71-for-AX-devices.patch patches.suse/iwlwifi-mvm-add-a-flag-to-reduce-power-command.patch patches.suse/iwlwifi-Configure-FW-debug-preset-via-module-param.patch @@ -14241,6 +14836,7 @@ patches.suse/drm-i915-display-Fix-HPD-short-pulse-handling-for-eD.patch patches.suse/drm-amd-display-Add-pstate-verification-and-recovery.patch patches.suse/xfs-use-setattr_copy-to-set-vfs-inode-attributes.patch + patches.suse/xfs-reserve-quota-for-dir-expansion-when-linking-unl.patch patches.suse/RDMA-core-Set-MR-type-in-ib_reg_user_mr.patch patches.suse/RDMA-mlx5-Delete-get_num_static_uars-function.patch patches.suse/RDMA-mlx5-Delete-useless-module.h-include.patch @@ -14325,9 +14921,28 @@ patches.suse/scsi-bnx2fc-Make-use-of-the-helper-macro-kthread_run.patch patches.suse/scsi-qedi-Remove-redundant-flush_workqueue-calls.patch patches.suse/scsi-lpfc-Remove-redundant-flush_workqueue-call.patch + patches.suse/scsi-mpt3sas-Convert-to-flexible-arrays.patch + patches.suse/scsi-smartpqi-Fix-rmmod-stack-trace.patch patches.suse/scsi-smartpqi-Add-PCI-IDs + patches.suse/scsi-smartpqi-Enable-SATA-NCQ-priority-in-sysfs.patch + patches.suse/scsi-smartpqi-Eliminate-drive-spin-down-on-warm-boot.patch + patches.suse/scsi-smartpqi-Quickly-propagate-path-failures-to-SCS.patch + patches.suse/scsi-smartpqi-Fix-a-name-typo-and-cleanup-code.patch + patches.suse/scsi-smartpqi-Fix-a-typo-in-func-pqi_aio_submit_io.patch + patches.suse/scsi-smartpqi-Resolve-delay-issue-with-PQI_HZ-value.patch + patches.suse/scsi-smartpqi-Avoid-drive-spin-down-during-suspend.patch + patches.suse/scsi-smartpqi-Update-volume-size-after-expansion.patch + patches.suse/scsi-smartpqi-Fix-kdump-issue-when-controller-is-loc.patch + patches.suse/scsi-smartpqi-Speed-up-RAID-10-sequential-reads.patch + patches.suse/scsi-smartpqi-Expose-SAS-address-for-SATA-drives.patch + patches.suse/scsi-smartpqi-Fix-NUMA-node-not-updated-during-init.patch + patches.suse/scsi-smartpqi-Fix-BUILD_BUG_ON-statements.patch + patches.suse/scsi-smartpqi-Fix-hibernate-and-suspend.patch + patches.suse/scsi-smartpqi-Fix-lsscsi-t-SAS-addresses.patch + patches.suse/scsi-smartpqi-Update-version-to-2.1.14-035.patch patches.suse/scsi-qla2xxx-Add-qla2x00_async_done-for-async-routin.patch patches.suse/scsi-qla2xxx-Remove-unused-qla_sess_op_cmd_list-from.patch + patches.suse/scsi-smartpqi-Fix-unused-variable-pqi_pm_ops-for-cla.patch patches.suse/scsi-mpi3mr-Fix-deadlock-while-canceling-the-fw-event.patch patches.suse/scsi-mpi3mr-Fix-printing-of-pending-I-O-count.patch patches.suse/scsi-mpi3mr-Update-MPI3-headers.patch @@ -14343,7 +14958,9 @@ patches.suse/scsi-iscsi-Stop-using-the-SCSI-pointer.patch patches.suse/scsi-bnx2fc-Stop-using-the-SCSI-pointer.patch patches.suse/scsi-qedf-Stop-using-the-SCSI-pointer.patch + patches.suse/scsi-megasas-Stop-using-the-SCSI-pointer.patch patches.suse/scsi-qla2xxx-Stop-using-the-SCSI-pointer.patch + patches.suse/scsi-smartpqi-Stop-using-the-SCSI-pointer.patch patches.suse/scsi-qla2xxx-Use-named-initializers-for-port_-d-stat.patch patches.suse/scsi-qla2xxx-Use-named-initializers-for-q_dev_state.patch patches.suse/scsi-mpi3mr-Fix-flushing-WQ_MEM_RECLAIM-events-warning.patch @@ -14368,11 +14985,13 @@ patches.suse/scsi-lpfc-Use-rport-as-argument-for-lpfc_chk_tgt_map.patch patches.suse/scsi-core-sd-Add-silence_suspend-flag-to-suppress-some-PM-messages.patch patches.suse/scsi-ufs-Fix-runtime-PM-messages-never-ending-cycle.patch + patches.suse/scsi-mpt3sas-Remove-scsi_dma_map-error-messages.patch patches.suse/scsi-mpt3sas-Fix-incorrect-4GB-boundary-check.patch patches.suse/scsi-ufs-core-scsi_get_lba-error-fix.patch patches.suse/scsi-lpfc-Remove-failing-soft_wwn-support.patch patches.suse/scsi-target-Add-iscsi-cpus_allowed_list-in-configfs.patch patches.suse/scsi-aacraid-Clean-up-some-inconsistent-indenting.patch + patches.suse/scsi-megasas-Clean-up-some-inconsistent-indenting.patch patches.suse/scsi-iscsi-Add-helper-functions-to-manage-iscsi_cls_conn.patch patches.suse/scsi-libiscsi-Add-iscsi_cls_conn-to-sysfs-after-initialization.patch patches.suse/scsi-libiscsi-Teardown-iscsi_cls_conn-gracefully.patch @@ -14938,6 +15557,7 @@ patches.suse/xfs-drop-async-cache-flushes-from-CIL-commits.patch patches.suse/watch_queue-Free-the-page-array-when-watch_queue-is-.patch patches.suse/platform-chrome-cros_ec_typec-Check-for-EC-device.patch + patches.suse/platform-chrome-Split-trace-include-file.patch patches.suse/platform-chrome-cros_ec_debugfs-detach-log-reader-wq.patch patches.suse/msft-hv-2562-PCI-hv-Remove-unused-hv_set_msi_entry_from_desc.patch patches.suse/KVM-x86-Check-lapic_in_kernel-before-attempting-to-s.patch @@ -15072,7 +15692,10 @@ patches.suse/scsi-virtio-scsi-Eliminate-anonymous-module_init-module_exit patches.suse/scsi-zorro7xx-Fix-a-resource-leak-in-zorro7xx_remove_one patches.suse/scsi-bnx2fc-Fix-spelling-mistake-mis-match-mismatch.patch + patches.suse/scsi-mpt3sas-Fix-mpt3sas_check_same_4gb_region-kdoc-.patch patches.suse/scsi-ufs-ufs-pci-Add-support-for-Intel-MTL.patch + patches.suse/scsi-mpt3sas-Fail-reset-operation-if-config-request-.patch + patches.suse/scsi-megaraid_sas-Target-with-invalid-LUN-ID-is-dele.patch patches.suse/tools-testing-nvdimm-Fix-security_init-symbol-collis.patch patches.suse/sched-core-Fix-forceidle-balancing.patch patches.suse/sched-Teach-the-forced-newidle-balancer-about-CPU-af.patch @@ -15082,6 +15705,7 @@ patches.suse/irqchip-gic-v4-Wait-for-GICR_VPENDBASER.Dirty-to-cle.patch patches.suse/irqchip-gic-v3-Fix-GICR_CTLR.RWP-polling.patch patches.suse/irqchip-gic-gic-v3-Prevent-GSI-to-SGI-translations.patch + patches.suse/powerpc-64-Fix-build-failure-with-allyesconfig-in-bo.patch patches.suse/powerpc-numa-Handle-partially-initialized-numa-nodes.patch patches.suse/platform-x86-samsung-laptop-Fix-an-unsigned-comparis.patch patches.suse/media-rockchip-rga-do-proper-error-checking-in-probe.patch @@ -15172,6 +15796,7 @@ patches.suse/spi-spi-mtk-nor-initialize-spi-controller-after-resu.patch patches.suse/spi-cadence-quadspi-fix-incorrect-supports_op-return.patch patches.suse/spi-atmel-quadspi-Fix-the-buswidth-adjustment-betwee.patch + patches.suse/net-smc-Fix-sock-leak-when-release-after-smc_shutdown patches.suse/ice-xsk-check-if-Rx-ring-was-filled-up-to-the-end.patch patches.suse/ice-allow-creating-VFs-for-CONFIG_NET_SWITCHDEV.patch patches.suse/ice-fix-crash-in-switchdev-mode.patch @@ -15299,6 +15924,8 @@ patches.suse/net-hns3-add-validity-check-for-message-data-length.patch patches.suse/net-hns3-add-return-value-for-mailbox-handling-in-PF.patch patches.suse/net-smc-sync-err-code-when-tcp-connection-was-refused + patches.suse/net-smc-Only-save-the-original-clcsock-callback-functions + patches.suse/net-smc-Fix-slab-out-of-bounds-issue-in-fallback patches.suse/net-bcmgenet-hide-status-block-before-TX-timestampin.patch patches.suse/net-phy-marvell10g-fix-return-value-on-error.patch patches.suse/net-dsa-mv88e6xxx-Fix-port_hidden_wait-to-account-fo.patch @@ -15465,6 +16092,7 @@ patches.suse/tcp-drop-the-hash_32-part-from-the-index-calculation.patch patches.suse/selftests-ocelot-tc_flower_chains-specify-conform-ex.patch patches.suse/NFC-netlink-fix-sleep-in-atomic-bug-when-firmware-do.patch + patches.suse/KVM-s390-Fix-lockdep-issue-in-vm-memop patches.suse/KVM-s390-vsie-gmap-reduce-gmap_rmap-overhead patches.suse/drm-amdgpu-do-not-use-passthrough-mode-in-Xen-dom0.patch patches.suse/drm-amd-display-Avoid-reading-audio-pattern-past-AUD.patch @@ -15671,6 +16299,7 @@ patches.suse/efi-Add-missing-prototype-for-efi_capsule_setup_info.patch patches.suse/efi-libstub-declare-DXE-services-table.patch patches.suse/efi-libstub-ensure-allocated-memory-to-be-executable.patch + patches.suse/efi-x86-Set-the-NX-compatibility-flag-in-the-PE-head.patch patches.suse/efi-libstub-pass-image-handle-to-handle_kernel_image.patch patches.suse/efi-arm64-libstub-run-image-in-place-if-randomized-b.patch patches.suse/efi-stub-prefer-mirrored-memory-for-randomized-alloc.patch @@ -16205,6 +16834,7 @@ patches.suse/sfc-siena-Reinstate-SRIOV-init-fini-function-calls.patch patches.suse/usbnet-Run-unregister_netdev-before-unbind-again.patch patches.suse/usbnet-smsc95xx-Don-t-clear-read-only-PHY-interrupt.patch + patches.suse/usbnet-smsc95xx-Don-t-reset-PHY-behind-PHY-driver-s-.patch patches.suse/usbnet-smsc95xx-Avoid-link-settings-race-on-interrup.patch patches.suse/bnxt_en-Update-firmware-interface-to-1.10.2.95.patch patches.suse/bnxt_en-Configure-ptp-filters-during-bnxt-open.patch @@ -16653,6 +17283,7 @@ patches.suse/ASoC-SOF-ipc3-dtrace-Move-dtrace-related-variables-l.patch patches.suse/ALSA-usb-audio-Configure-sync-endpoints-before-data.patch patches.suse/ALSA-hda-realtek-Add-new-type-for-ALC245.patch + patches.suse/scsi-megaraid_sas-Remove-unnecessary-memset.patch patches.suse/scsi-lpfc-Tweak-message-log-categories-for-ELS-FDMI-.patch patches.suse/scsi-lpfc-Move-cfg_log_verbose-check-before-calling-.patch patches.suse/scsi-lpfc-Fix-diagnostic-fw-logging-after-a-function.patch @@ -16681,6 +17312,11 @@ patches.suse/scsi-lpfc-Copyright-updates-for-14.2.0.2-patches.patch patches.suse/scsi-pmcraid-Remove-unneeded-semicolon.patch patches.suse/scsi-qla2xxx-Remove-free_sg-command-flag.patch + patches.suse/scsi-mpt3sas-Fix-_ctl_set_task_mid-TaskMID-check.patch + patches.suse/scsi-mpt3sas-Fix-writel-use.patch + patches.suse/scsi-mpt3sas-Fix-ioc-base_readl-use.patch + patches.suse/scsi-mpt3sas-Fix-event-callback-log_code-value-handl.patch + patches.suse/scsi-mpt3sas-Fix-adapter-replyPostRegisterIndex-decl.patch patches.suse/scsi-iscsi-Fix-harmless-double-shift-bug.patch patches.suse/scsi-ufs-qcom-Fix-ufs_qcom_resume patches.suse/scsi-aacraid-Fix-undefined-behavior-due-to-shift-overflowing-the-constant.patch @@ -16728,8 +17364,11 @@ patches.suse/scsi-bnx2fc-Avoid-using-get_cpu-in-bnx2fc_cmd_alloc.patch patches.suse/scsi-mpi3mr-Fix-a-NULL-vs-IS_ERR-bug-in-mpi3mr_bsg_init.patch patches.suse/scsi-ipr-Use-kobj_to_dev.patch + patches.suse/scsi-mpt3sas-Fix-junk-chars-displayed-while-printing.patch + patches.suse/scsi-mpt3sas-Update-driver-version-to-42.100.00.00.patch patches.suse/scsi-hisi_sas-Fix-rescan-after-deleting-a-disk patches.suse/scsi-mpi3mr-Return-error-if-dma_alloc_coherent-fails.patch + patches.suse/scsi-megaraid_sas-Remove-redundant-memset-statement.patch patches.suse/scsi-mpi3mr-Add-shost-related-sysfs-attributes.patch patches.suse/scsi-mpi3mr-Add-target-device-related-sysfs-attributes.patch patches.suse/scsi-fnic-Replace-DMA-mask-of-64-bits-with-47-bits @@ -16824,6 +17463,9 @@ patches.suse/KVM-x86-Use-__try_cmpxchg_user-to-update-guest-PTE-A.patch patches.suse/KVM-x86-Use-__try_cmpxchg_user-to-emulate-atomic-acc.patch patches.suse/KVM-x86-avoid-loading-a-vCPU-after-.vm_destroy-was-c.patch + patches.suse/drivers-s390-char-Add-Ultravisor-io-device + patches.suse/KVM-s390-Don-t-indicate-suppression-on-dirtying-failing-memop + patches.suse/KVM-s390-selftest-Test-suppression-indication-on-key-prot-exception patches.suse/KVM-SVM-Use-kzalloc-for-sev-ioctl-interfaces-to-prev.patch patches.suse/KVM-x86-avoid-calling-x86-emulator-without-a-decoded-instruction patches.suse/KVM-LAPIC-Drop-pending-LAPIC-timer-injection-when-ca.patch @@ -16993,6 +17635,7 @@ patches.suse/msft-hv-2582-hv_sock-Copy-packets-sent-by-Hyper-V-out-of-the-ring.patch patches.suse/msft-hv-2583-hv_sock-Add-validation-for-untrusted-Hyper-V-values.patch patches.suse/msft-hv-2584-Drivers-hv-vmbus-Accept-hv_sock-offers-in-isolated-g.patch + patches.suse/msft-hv-2585-Drivers-hv-vmbus-Refactor-the-ring-buffer-iterator-f.patch patches.suse/msft-hv-2586-PCI-hv-Fix-hv_arch_irq_unmask-for-multi-MSI.patch patches.suse/msft-hv-2588-PCI-hv-Do-not-set-PCI_COMMAND_MEMORY-to-reduce-VM-bo.patch patches.suse/msft-hv-2597-x86-hyperv-Disable-hardlockup-detector-by-default-in.patch @@ -17001,24 +17644,29 @@ patches.suse/msft-hv-2604-PCI-hv-Add-validation-for-untrusted-Hyper-V-values.patch patches.suse/pci-hv-fix-synchronization-between-channel-callback-hv_pci_bus_exit.patch patches.suse/msft-hv-2611-Drivers-hv-vmbus-fix-typo-in-comment.patch + patches.suse/msft-hv-2613-hv_balloon-Fix-balloon_probe-and-balloon_remove-erro.patch patches.suse/Input-sparcspkr-fix-refcount-leak-in-bbc_beep_probe.patch patches.suse/Input-gpio-keys-cancel-delayed-work-only-in-case-of-.patch patches.suse/Input-stmfts-do-not-leave-device-disabled-in-stmfts_.patch patches.suse/tracing-Fix-potential-double-free-in-create_var_ref.patch patches.suse/tracing-Fix-return-value-of-trace_pid_write.patch patches.suse/ftrace-Clean-up-hash-direct_functions-on-register-failures.patch + patches.suse/dmaengine-idxd-don-t-load-pasid-config-until-needed.patch patches.suse/dmaengine-tegra-Add-tegra-gpcdma-driver.patch patches.suse/dmaengine-idxd-update-IAA-definitions-for-user-heade.patch patches.suse/dmaengine-idxd-set-DMA_INTERRUPT-cap-bit.patch patches.suse/dmaengine-tegra-Remove-unused-including-linux-versio.patch patches.suse/dmaengine-tegra-Use-platform_get_irq-to-get-IRQ-reso.patch patches.suse/dmaengine-idxd-Fix-the-error-handling-path-in-idxd_c.patch + patches.suse/dmaengine-idxd-Separate-user-and-kernel-pasid-enabli.patch patches.suse/dmaengine-zynqmp_dma-In-struct-zynqmp_dma_chan-fix-d.patch patches.suse/dmaengine-idxd-add-missing-callback-function-to-supp.patch + patches.suse/dmaengine-tegra-Fix-build-error-without-IOMMU_API.patch patches.suse/dmaengine-stm32-mdma-remove-GISR1-register.patch patches.suse/dmaengine-stm32-mdma-fix-chan-initialization-in-stm3.patch patches.suse/dmaengine-tegra-Fix-uninitialized-variable-usage.patch patches.suse/dmaengine-tegra-Remove-unused-switch-case.patch + patches.suse/MIPS-Loongson-Use-hwmon_device_register_with_groups-.patch patches.suse/ACPI-DPTF-Support-Meteor-Lake.patch patches.suse/ACPI-glue-Rearrange-find_child_checks.patch patches.suse/ACPI-processor-idle-Expose-max_cstate-nocst-bm_check.patch @@ -17081,6 +17729,7 @@ patches.suse/rtc-mt6397-check-return-value-after-calling-platform.patch patches.suse/rtc-ftrtc010-Fix-error-handling-in-ftrtc010_rtc_prob.patch patches.suse/rtc-mxc-Silence-a-clang-warning.patch + patches.suse/xfs-fix-xfs_ifree-error-handling-to-not-leak-perag-r.patch patches.suse/assoc_array-Fix-BUG_ON-during-garbage-collect.patch patches.suse/i2c-ismt-prevent-memory-corruption-in-ismt_access.patch patches.suse/net-smc-set-ini-smcrv2.ib_dev_v2-to-NULL-if-SMC-Rv2-is-unavailable @@ -17234,6 +17883,7 @@ patches.suse/USB-gadget-Fix-mistakes-in-UDC-core-kerneldoc.patch patches.suse/USB-gadget-Add-a-new-bus-for-gadgets.patch patches.suse/usb-dwc2-gadget-don-t-reset-gadget-s-driver-bus.patch + patches.suse/USB-gadget-Fix-return-of-EBUSY.patch patches.suse/usb-ehci-omap-drop-unused-ehci_read-function.patch patches.suse/usb-dwc3-host-Stop-setting-the-ACPI-companion.patch patches.suse/usb-dwc3-gadget-Prevent-repeat-pullup.patch @@ -17359,6 +18009,7 @@ patches.suse/scsi-lpfc-Add-support-for-VMID-tagging-of-NVMe-I-Os.patch patches.suse/msft-hv-2610-scsi-storvsc-Fix-typo-in-comment.patch patches.suse/scsi-qedf-Fix-typo-in-comment.patch + patches.suse/scsi-smartpqi-Fix-typo-in-comment.patch patches.suse/scsi-pmcraid-Fix-typo-in-comment.patch patches.suse/scsi-mpi3mr-Rework-mrioc-bsg_device-model-to-fix-warnings.patch patches.suse/scsi-sd-Fix-potential-NULL-pointer-dereference.patch @@ -17458,6 +18109,7 @@ patches.suse/scsi-lpfc-Add-more-logging-of-cmd-and-cqe-informatio.patch patches.suse/scsi-lpfc-Allow-reduced-polling-rate-for-nvme_admin_.patch patches.suse/scsi-lpfc-Update-lpfc-version-to-14.2.0.4.patch + patches.suse/scsi-mpt3sas-Fix-out-of-bounds-compiler-warning.patch patches.suse/scsi-ipr-Fix-missing-incorrect-resource-cleanup-in-e.patch patches.suse/scsi-pmcraid-Fix-missing-resource-cleanup-in-error-case.patch patches.suse/gpio-dwapb-Don-t-print-error-on-EPROBE_DEFER.patch @@ -17506,6 +18158,7 @@ patches.suse/usb-gadget-lpc32xx_udc-Fix-refcount-leak-in-lpc32xx_.patch patches.suse/usb-gadget-u_ether-fix-regression-in-setting-fixed-M.patch patches.suse/usb-cdnsp-Fixed-setting-last_trb-incorrectly.patch + patches.suse/xhci-Fix-null-pointer-dereference-in-resume-if-xhci-.patch patches.suse/usb-gadget-f_fs-change-ep-status-safe-in-ffs_epfile_.patch patches.suse/usb-gadget-f_fs-change-ep-ep-safe-in-ffs_epfile_io.patch patches.suse/tty-n_gsm-Debug-output-allocation-must-use-GFP_ATOMI.patch @@ -17516,6 +18169,7 @@ patches.suse/0019-block-Fix-handling-of-offline-queues-in-blk_mq_alloc.patch patches.suse/msft-hv-2618-Drivers-hv-Fix-syntax-errors-in-comments.patch patches.suse/clocksource-hyper-v-unexport-__init-annotated-hv_ini.patch + patches.suse/msft-hv-2620-HID-hyperv-Correctly-access-fields-declared-as-__le1.patch patches.suse/msft-hv-2621-Drivers-hv-vmbus-Release-cpu-lock-in-error-case.patch patches.suse/x86-hyper-v-add-sev-negotiate-protocol-support-in-isolation-vm.patch patches.suse/arm64-ftrace-fix-branch-range-checks.patch @@ -17549,6 +18203,7 @@ patches.suse/mm-slub-add-missing-TID-updates-on-slab-deactivation.patch patches.suse/scsi-scsi_debug-Fix-zone-transition-to-full-condition.patch patches.suse/scsi-iscsi-Exclude-zero-from-the-endpoint-ID-range.patch + patches.suse/msft-hv-2625-scsi-storvsc-Correct-reporting-of-Hyper-V-I-O-size-l.patch patches.suse/scsi-ibmvfc-Allocate-free-queue-resource-only-during.patch patches.suse/scsi-ibmvfc-Store-vhost-pointer-during-subcrq-alloca.patch patches.suse/efi-sysfb_efi-remove-unnecessary-asm-efi.h-include.patch @@ -17813,6 +18468,7 @@ patches.suse/iommu-vt-d-Fix-PCI-bus-rescan-device-hot-add patches.suse/iommu-vt-d-Fix-RID2PASID-setup-teardown-failure patches.suse/memregion-Fix-memregion_free-fallback-definition.patch + patches.suse/PM-runtime-Redefine-pm_runtime_release_supplier.patch patches.suse/ACPI-CPPC-Only-probe-for-_CPC-if-CPPC-v2-is-acked.patch patches.suse/ACPI-CPPC-Don-t-require-_OSC-if-X86_FEATURE_CPPC-is-.patch patches.suse/powerpc-powernv-delay-rng-platform-device-creation-u.patch @@ -17829,6 +18485,7 @@ patches.suse/dmaengine-qcom-bam_dma-fix-runtime-PM-underflow.patch patches.suse/dmaengine-idxd-force-wq-context-cleanup-on-device-di.patch patches.suse/dmaengine-at_xdma-handle-errors-of-at_xdmac_alloc_de.patch + patches.suse/dmaengine-idxd-Only-call-idxd_enable_system_pasid-if.patch patches.suse/dmaengine-pl330-Fix-lockdep-warning-about-non-static.patch patches.suse/dmaengine-lgm-Fix-an-error-handling-path-in-intel_ld.patch patches.suse/dt-bindings-dma-allwinner-sun50i-a64-dma-Fix-min-max.patch @@ -18306,6 +18963,7 @@ patches.suse/ixgbe-Fix-typos-in-comments.patch patches.suse/drivers-net-ethernet-intel-fix-typos-in-comments.patch patches.suse/ethernet-Remove-vf-rate-limit-check-for-drivers.patch + patches.suse/net-smsc95xx-add-support-for-Microchip-EVB-LAN8670-U.patch patches.suse/net-mlx5-Introduce-header-modify-pattern-ICM-propert.patch patches.suse/net-mlx5-Manage-ICM-of-type-modify-header-pattern.patch patches.suse/RDMA-mlx5-Support-handling-of-modify-header-pattern-.patch @@ -18621,6 +19279,7 @@ patches.suse/drm-bridge-tc358767-Make-sure-Refclk-clock-are-enabl.patch patches.suse/drm-st7735r-Fix-module-autoloading-for-Okaya-RH12812.patch patches.suse/drm-mipi-dbi-align-max_chunk-to-2-in-spi_transfer.patch + patches.suse/msft-hv-2614-drm-hyperv-Removing-the-restruction-of-VRAM-allocati.patch patches.suse/drm-nouveau-fix-another-off-by-one-in-nvbios_addr.patch patches.suse/drm-bridge-lt9611uxc-Cancel-only-driver-s-work.patch patches.suse/virtio-gpu-fix-a-missing-check-to-avoid-NULL-derefer.patch @@ -18628,6 +19287,7 @@ patches.suse/drm-adv7511-override-i2c-address-of-cec-before-acces.patch patches.suse/0009-fbcon-Fix-accelerated-fbdev-scrolling-while-logo-is-.patch patches.suse/fbcon-Fix-boundary-checks-for-fbcon-vc-n1-n2-paramet.patch + patches.suse/msft-hv-2626-drm-hyperv-drm-Include-framebuffer-and-EDID-headers.patch patches.suse/drm-bridge-adv7511-Add-check-for-mipi_dsi_driver_reg.patch patches.suse/drm-mcde-Fix-refcount-leak-in-mcde_dsi_bind.patch patches.suse/drm-doc-Fix-comment-typo.patch @@ -18817,6 +19477,7 @@ patches.suse/usb-typec-Add-support-for-retimers.patch patches.suse/usb-typec-Add-retimer-handle-to-port.patch patches.suse/USB-serial-fix-tty-port-initialized-comments.patch + patches.suse/usb-xhci_plat_remove-avoid-NULL-dereference.patch patches.suse/USB-HCD-Fix-URB-giveback-issue-in-tasklet-function.patch patches.suse/usb-core-fix-repeated-words-in-comments.patch patches.suse/usb-typec-tcpm-fix-repeated-words-in-comments.patch @@ -18971,9 +19632,13 @@ patches.suse/scsi-iscsi-Fix-session-removal-on-shutdown.patch patches.suse/scsi-iscsi-Rename-iscsi_conn_queue_work.patch patches.suse/scsi-iscsi-Remove-iscsi_get_task-back_lock-requirement.patch + patches.suse/scsi-mpt3sas-Fix-typo-in-comment.patch + patches.suse/scsi-mpt3sas-Fix-whitespace-and-spelling-mistake.patch + patches.suse/scsi-mpt3sas-Remove-flush_scheduled_work-call.patch patches.suse/scsi-qla2xxx-Check-correct-variable-in-qla24xx_async.patch patches.suse/scsi-mpi3mr-Enable-shared-host-tagset.patch patches.suse/scsi-mpi3mr-Increase-cmd_per_lun-to-128.patch + patches.suse/scsi-megaraid_sas-Clean-up-some-inconsistent-indenti.patch patches.suse/scsi-lpfc-Fix-uninitialized-cqe-field-in-lpfc_nvme_c.patch patches.suse/scsi-lpfc-Prevent-buffer-overflow-crashes-in-debugfs.patch patches.suse/scsi-lpfc-Set-PU-field-when-providing-D_ID-in-XMIT_E.patch @@ -18987,8 +19652,21 @@ patches.suse/scsi-lpfc-Update-lpfc-version-to-14.2.0.5.patch patches.suse/scsi-lpfc-Copyright-updates-for-14.2.0.5-patches.patch patches.suse/scsi-smartpqi-Shorten-drive-visibility-after-removal.patch + patches.suse/scsi-smartpqi-Add-controller-fw-version-to-console-l.patch + patches.suse/scsi-smartpqi-Add-PCI-IDs-for-ramaxel-controllers.patch + patches.suse/scsi-smartpqi-Close-write-read-holes.patch + patches.suse/scsi-smartpqi-Add-driver-support-for-multi-LUN-devic.patch + patches.suse/scsi-smartpqi-Fix-PCI-control-linkdown-system-hang.patch + patches.suse/scsi-smartpqi-Add-PCI-ID-for-Adaptec-SmartHBA-2100-8.patch + patches.suse/scsi-smartpqi-Add-PCI-IDs-for-Lenovo-controllers.patch + patches.suse/scsi-smartpqi-Stop-logging-spurious-PQI-reset-failur.patch patches.suse/scsi-smartpqi-Fix-DMA-direction-for-RAID-requests.patch + patches.suse/scsi-smartpqi-Fix-RAID-map-race-condition.patch patches.suse/scsi-smartpqi-Add-module-param-to-disable-managed-ints.patch + patches.suse/scsi-smartpqi-Update-deleting-a-LUN-via-sysfs.patch + patches.suse/scsi-smartpqi-Add-ctrl-ready-timeout-module-paramete.patch + patches.suse/scsi-smartpqi-Update-copyright-to-current-year.patch + patches.suse/scsi-smartpqi-Update-version-to-2.1.18-045.patch patches.suse/scsi-Revert-scsi-qla2xxx-Fix-disk-failure-to-rediscover.patch patches.suse/scsi-qla2xxx-Fix-incorrect-display-of-max-frame-size.patch patches.suse/scsi-qla2xxx-Zero-undefined-mailbox-IN-registers.patch @@ -19004,6 +19682,7 @@ patches.suse/scsi-mpi3mr-Reduce-VD-queue-depth-on-detecting-throttling.patch patches.suse/scsi-mpi3mr-Unlock-on-error-path.patch patches.suse/scsi-mpi3mr-Delete-a-stray-tab.patch + patches.suse/scsi-megaraid-Remove-the-static-variable-initialisat.patch patches.suse/RDMA-mlx5-Add-a-umr-recovery-flow.patch patches.suse/RDMA-rxe-fix-xa_alloc_cycle-error-return-value-check.patch patches.suse/RDMA-rxe-Remove-useless-pkt-parameters.patch @@ -19573,6 +20252,8 @@ patches.suse/net-ice-fix-initializing-the-bitmap-in-the-switch-co.patch patches.suse/iommu-vt-d-avoid-invalid-memory-access-via-node_online-NUMA_NO_N patches.suse/x86-olpc-fix-logical-not-is-only-applied-to-the-left.patch + patches.suse/msft-hv-2630-Drivers-hv-vm_bus-Handle-vmbus-rescind-calls-after-v.patch + patches.suse/msft-hv-2634-Drivers-hv-Create-debugfs-file-with-hyper-v-balloon-.patch patches.suse/exfat-Return-ENAMETOOLONG-consistently-for-oversized.patch patches.suse/exfat-Define-NLS_NAME_-as-bit-flags-explicitly.patch patches.suse/exfat-Expand-exfat_err-and-co-directly-to-pr_-macro.patch @@ -19736,6 +20417,7 @@ patches.suse/nvme-fabrics-parse-nvme-connect-Linux-error-codes.patch patches.suse/scsi-lpfc-Check-the-return-value-of-alloc_workqueue.patch patches.suse/scsi-zfcp-Fix-missing-auto-port-scan-and-thus-missing-target-ports + patches.suse/scsi-megaraid_sas-Remove-redundant-variable-cmd_type.patch patches.suse/NTB-ntb_tool-uninitialized-heap-data-in-tool_fn_writ.patch patches.suse/docs-i2c-i2c-sysfs-fix-hyperlinks.patch patches.suse/posix-cpu-timers-Cleanup-CPU-timers-before-freeing-t.patch @@ -19870,6 +20552,8 @@ patches.suse/ixgbe-stop-resetting-SYSTIME-in-ixgbe_ptp_start_cycl.patch patches.suse/i40e-Fix-incorrect-address-type-for-IPv6-flow-rules.patch patches.suse/scsi-qla2xxx-Disable-ATIO-interrupt-coalesce-for-qua.patch + patches.suse/scsi-megaraid_sas-Fix-double-kfree.patch + patches.suse/scsi-megaraid_sas-Remove-unnecessary-kfree.patch patches.suse/msft-hv-2639-scsi-storvsc-Remove-WQ_MEM_RECLAIM-from-storvsc_erro.patch patches.suse/0009-blk-mq-fix-io-hung-due-to-missing-commit_rqs.patch patches.suse/loop-Check-for-overflow-while-configuring-loop.patch @@ -19946,6 +20630,8 @@ patches.suse/usb-gadget-f_uac2-fix-superspeed-transfer.patch patches.suse/USB-cdc-acm-Add-Icom-PMR-F3400-support-0c26-0020.patch patches.suse/thunderbolt-Use-the-actual-buffer-in-tb_async_error.patch + patches.suse/thunderbolt-Check-router-generation-before-connectin.patch + patches.suse/xhci-Fix-null-pointer-dereference-in-remove-if-xHC-h.patch patches.suse/usb-add-quirks-for-Lenovo-OneLink-Dock.patch patches.suse/usb-dwc3-disable-USB-core-PHY-management.patch patches.suse/usb-typec-Remove-retimers-properly.patch @@ -20048,6 +20734,7 @@ patches.suse/wifi-iwlegacy-4965-corrected-fix-for-potential-off-b.patch patches.suse/wifi-mac80211_hwsim-check-length-for-virtio-packets.patch patches.suse/net-usb-qmi_wwan-add-Quectel-RM520N.patch + patches.suse/net-smc-Fix-possible-access-to-freed-memory-in-link-clear patches.suse/sch_sfb-Also-store-skb-len-before-calling-child-enqu.patch patches.suse/regulator-core-Clean-up-on-enable-failure.patch patches.suse/regulator-pfuze100-Fix-the-global-out-of-bounds-acce.patch @@ -20106,6 +20793,7 @@ patches.suse/NFSv4-Turn-off-open-by-filehandle-and-NFS-re-export-.patch patches.suse/NFSv4.2-Update-mode-bits-after-ALLOCATE-and-DEALLOCA.patch patches.suse/Revert-SUNRPC-Remove-unreachable-error-condition.patch + patches.suse/msft-hv-2640-drm-hyperv-Fix-an-error-handling-path-in-hyperv_vmbu.patch patches.suse/msft-hv-2641-tools-hv-Remove-an-extraneous-the.patch patches.suse/msft-hv-2646-Drivers-hv-remove-duplicate-word-in-a-comment.patch patches.suse/msft-hv-2647-tools-hv-kvp-remove-unnecessary-void-conversions.patch @@ -20294,6 +20982,13 @@ patches.suse/ima-fix-blocking-of-security.ima-xattrs-of-unsupport.patch patches.suse/efi-Correct-Macmini-DMI-match-in-uefi-cert-quirk.patch patches.suse/selinux-use-grep-E-instead-of-egrep.patch + patches.suse/SUNRPC-Fix-svcxdr_init_decode-s-end-of-buffer-calcul.patch + patches.suse/SUNRPC-Fix-svcxdr_init_encode-s-buflen-calculation.patch + patches.suse/NFSD-Protect-against-send-buffer-overflow-in-NFSv2-Rdir.patch + patches.suse/NFSD-Protect-against-send-buffer-overflow-in-NFSv3-Rdir.patch + patches.suse/NFSD-Protect-against-send-buffer-overflow-in-NFSv2-R.patch + patches.suse/NFSD-Protect-against-send-buffer-overflow-in-NFSv3-R.patch + patches.suse/NFSD-Cap-rsize_bop-result-based-on-send-buffer-size.patch patches.suse/x86-mce-Retrieve-poison-range-from-hardware.patch patches.suse/ice-set-tx_tstamps-when-creating-new-Tx-rings-via-et.patch patches.suse/ice-initialize-cached_phctime-when-creating-Rx-rings.patch @@ -20773,6 +21468,9 @@ patches.suse/IB-rdmavt-Add-__init-__exit-annotations-to-module-in.patch patches.suse/RDMA-usnic-fix-set-but-not-unused-variable-flags-war.patch patches.suse/RDMA-rxe-Remove-error-warning-messages-from-packet-r.patch + patches.suse/scsi-mpt3sas-Add-support-for-ATTO-ExpressSAS-H12xx-G.patch + patches.suse/scsi-mpt3sas-Disable-MPI2_FUNCTION_FW_DOWNLOAD-for-A.patch + patches.suse/scsi-megaraid-Remove-redundant-assignment-to-variabl.patch patches.suse/scsi-lpfc-Fix-unsolicited-FLOGI-receive-handling-dur.patch patches.suse/scsi-lpfc-Fix-null-ndlp-ptr-dereference-in-abnormal-.patch patches.suse/scsi-lpfc-Rework-MIB-Rx-Monitor-debug-info-logic.patch @@ -20784,6 +21482,17 @@ patches.suse/scsi-qla2xxx-remove-unused-qlt_tmr_work.patch patches.suse/scsi-qla2xxx-always-wait-for-qlt_sess_work_fn-from.patch patches.suse/scsi-qla2xxx-avoid-flush_scheduled_work-usage.patch + patches.suse/scsi-megaraid_sas-Replace-one-element-array-with-fle.patch + patches.suse/scsi-megaraid_sas-Replace-one-element-array-with-fle-204a29a1.patch + patches.suse/scsi-megaraid_sas-Replace-one-element-array-with-fle-eeb3bab7.patch + patches.suse/scsi-megaraid_sas-Replace-one-element-array-with-fle-ee92366a.patch + patches.suse/scsi-megaraid_sas-Use-struct_size-in-code-related-to.patch + patches.suse/scsi-megaraid_sas-Use-struct_size-in-code-related-to-48658213.patch + patches.suse/scsi-mpt3sas-Prevent-error-handler-escalation-when-d.patch + patches.suse/scsi-mpt3sas-Don-t-change-DMA-mask-while-reallocatin.patch + patches.suse/scsi-mpt3sas-Fix-trace-buffer-registration-failed.patch + patches.suse/scsi-mpt3sas-Increase-cmd_per_lun-to-128.patch + patches.suse/scsi-mpt3sas-Update-driver-version-to-43.100.00.00.patch patches.suse/scsi-qla2xxx-log-message-skipping-scsi_scan_host-as.patch patches.suse/scsi-qla2xxx-revert-scsi-qla2xxx-fix-response-queue-handler.patch patches.suse/scsi-qla2xxx-fix-response-queue-handler-reading-stale-packets.patch @@ -20794,6 +21503,8 @@ patches.suse/scsi-qla2xxx-update-version-to-10.02.07.900-k.patch patches.suse/scsi-lpfc-remove-the-unneeded-result-variable.patch patches.suse/scsi-lpfc-remove-unneeded-result-variable.patch + patches.suse/scsi-hpsa-Use-the-bitmap-API-to-allocate-bitmaps.patch + patches.suse/scsi-hpsa-Simplify-clear-set-_bit-parameters.patch patches.suse/msft-hv-2651-scsi-storvsc-Drop-DID_TARGET_FAILURE-use.patch patches.suse/scsi-qla2xxx-drop-did_target_failure-use.patch patches.suse/scsi-qla2xxx-fix-spelling-mistake-definiton-definition.patch @@ -20811,10 +21522,13 @@ patches.suse/scsi-lpfc-add-reporting-capability-for-link-degrade-signaling.patch patches.suse/scsi-lpfc-fix-various-issues-reported-by-tools.patch patches.suse/scsi-lpfc-update-lpfc-version-to-14.2.0.7.patch + patches.suse/scsi-megaraid-Convert-sysfs-snprintf-to-sysfs_emit.patch patches.suse/scsi-csiostor-Convert-sysfs-snprintf-to-sysfs_emit.patch patches.suse/scsi-scsi_transport_fc-Use-u-for-dev_loss_tmo.patch patches.suse/scsi-libsas-Fix-use-after-free-bug-in-smp_execute_task_sg.patch patches.suse/scsi-qedf-Populate-sysfs-attributes-for-vport.patch + patches.suse/scsi-mpt3sas-Revert-scsi-mpt3sas-Fix-writel-use.patch + patches.suse/scsi-mpt3sas-Revert-scsi-mpt3sas-Fix-ioc-base_readl-.patch patches.suse/scsi-mpi3mr-Schedule-IRQ-kthreads-only-on-non-RT-kernels.patch patches.suse/scsi-stex-Properly-zero-out-the-passthrough-command-structure.patch patches.suse/dmaengine-hisilicon-Disable-channels-when-unregister.patch @@ -20946,6 +21660,7 @@ patches.suse/powerpc-mm-64s-Drop-pgd_huge.patch patches.suse/ppc64-kdump-Limit-kdump-base-to-512MB.patch patches.suse/powerpc-pseries-Move-vas_migration_handler-early-dur.patch + patches.suse/powerpc-boot-Explicitly-disable-usage-of-SPE-instruc.patch patches.suse/powerpc-kprobes-Fix-null-pointer-reference-in-arch_p.patch patches.suse/powerpc-Ignore-DSI-error-caused-by-the-copy-paste-in.patch patches.suse/powerpc-pseries-vas-Pass-hw_cpu_id-to-node-associati.patch @@ -20990,6 +21705,7 @@ patches.suse/msft-hv-2656-Drivers-hv-vmbus-Use-PCI_VENDOR_ID_MICROSOFT-for-bet.patch patches.suse/msft-hv-2657-Drivers-hv-vmbus-Don-t-wait-for-the-ACPI-device-upon.patch patches.suse/msft-hv-2658-scsi-storvsc-remove-an-extraneous-to-in-a-comment.patch + patches.suse/msft-hv-2667-hyperv-simplify-and-rename-generate_guest_id.patch patches.suse/msft-hv-2669-drm-hyperv-Add-ratelimit-on-error-message.patch patches.suse/mm-migration-fix-the-FOLL_GET-failure-on-following-huge-page.patch patches.suse/Kselftests-remove-support-of-libhugetlbfs-from-kselftests.patch @@ -21131,6 +21847,7 @@ patches.suse/i40e-Fix-DMA-mappings-leak.patch patches.suse/sfc-Change-VF-mac-via-PF-as-first-preference-if-avai.patch patches.suse/net-phy-dp83867-Extend-RX-strap-quirk-for-SGMII-mode.patch + patches.suse/net-smc-Fix-an-error-code-in-smc_lgr_create patches.suse/bnxt_en-fix-memory-leak-in-bnxt_nvm_test.patch patches.suse/sfc-include-vport_id-in-filter-spec-hash-and-equal.patch patches.suse/wwan_hwsim-fix-possible-memory-leak-in-wwan_hwsim_de.patch @@ -21138,6 +21855,7 @@ patches.suse/drm-amdgpu-set-vm_update_mode-0-as-default-for-Sienn.patch patches.suse/drm-amdgpu-fix-sdma-doorbell-init-ordering-on-APUs.patch patches.suse/gcov-support-GCC-12.1-and-newer-compilers.patch + patches.suse/nouveau-fix-migrate_to_ram-for-faulting-page.patch patches.suse/selinux-enable-use-of-both-GFP_KERNEL-and-GFP_ATOMIC.patch patches.suse/scsi-lpfc-Fix-memory-leak-in-lpfc_create_port.patch patches.suse/cifs-Fix-xid-leak-in-cifs_create-.patch @@ -21185,6 +21903,7 @@ patches.suse/KVM-x86-Add-compat-handler-for-KVM_X86_SET_MSR_FILTE.patch patches.suse/pinctrl-Ingenic-JZ4755-bug-fixes.patch patches.suse/net-mlx5e-Cleanup-MACsec-uninitialization-routine.patch + patches.suse/scsi-mpt3sas-re-do-lost-mpt3sas-DMA-mask-fix.patch patches.suse/platform-x86-intel-pmc-core-Add-Raptor-Lake-support-.patch patches.suse/media-vivid-s_fbuf-add-more-sanity-checks.patch patches.suse/media-vivid-dev-bitmap_cap-wasn-t-freed-in-all-cases.patch @@ -21259,6 +21978,7 @@ patches.suse/cifs-Fix-pages-array-leak-when-writedata-alloc-failed-in-cifs_write.patch patches.suse/cifs-Fix-pages-leak-when-writedata-alloc-failed-in-cifs_write_from_.patch patches.suse/cifs-fix-use-after-free-caused-by-invalid-pointer-hostname-.patch + patches.suse/usb-gadget-aspeed-Fix-probe-regression.patch patches.suse/usb-typec-ucsi-Check-the-connection-on-resume.patch patches.suse/usb-typec-ucsi-acpi-Implement-resume-callback.patch patches.suse/usb-bdc-change-state-when-port-disconnected.patch @@ -21302,6 +22022,7 @@ patches.suse/Bluetooth-L2CAP-Fix-memory-leak-in-vhci_write.patch patches.suse/Bluetooth-L2CAP-Fix-accepting-connection-request-for.patch patches.suse/Bluetooth-L2CAP-Fix-attempting-to-access-uninitializ.patch + patches.suse/net-smc-Fix-possible-leaked-pernet-namespace-in-smc_init patches.suse/vsock-remove-the-unused-wait-in-vsock_connectible_re.patch patches.suse/vsock-fix-possible-infinite-sleep-in-vsock_connectib.patch patches.suse/selftests-pidfd_test-Remove-the-erroneous.patch @@ -21328,6 +22049,7 @@ patches.suse/Documentation-devres-add-missing-I2C-helper.patch patches.suse/arm64-entry-avoid-kprobe-recursion.patch patches.suse/ring-buffer-Check-for-NULL-cpu_buffer-in-ring_buffer.patch + patches.suse/ftrace-Fix-use-after-free-for-dynamic-ftrace_ops.patch patches.suse/tracing-kprobe-Fix-memory-leak-in-test_gen_kprobe-kretprobe_cmd.patch patches.suse/cifs-always-iterate-smb-sessions-using-primary-channel.patch patches.suse/cifs-avoid-unnecessary-iteration-of-tcp-sessions.patch @@ -21361,6 +22083,7 @@ patches.suse/ALSA-usb-audio-Add-quirk-entry-for-M-Audio-Micro.patch patches.suse/ALSA-usb-audio-Add-DSD-support-for-Accuphase-DAC-60.patch patches.suse/ALSA-hda-realtek-Add-Positivo-C6300-model-quirk.patch + patches.suse/ALSA-memalloc-Don-t-fall-back-for-SG-buffer-with-IOM.patch patches.suse/ALSA-hda-fix-potential-memleak-in-add_widget_node.patch patches.suse/msft-hv-2675-HID-hyperv-fix-possible-memory-leak-in-mousevsc_prob.patch patches.suse/mmc-cqhci-Provide-helper-for-resetting-both-SDHCI-an.patch @@ -21370,6 +22093,7 @@ patches.suse/mmc-sdhci_am654-Fix-SDHCI_RESET_ALL-for-CQHCI.patch patches.suse/mmc-sdhci-esdhc-imx-use-the-correct-host-caps-for-MM.patch patches.suse/spi-stm32-Print-summary-callbacks-suppressed-message.patch + patches.suse/dmaengine-idxd-Do-not-enable-user-type-Work-Queue-wi.patch patches.suse/dmaengine-pxa_dma-use-platform_get_irq_optional.patch patches.suse/dmaengine-mv_xor_v2-Fix-a-resource-leak-in-mv_xor_v2.patch patches.suse/dmaengine-ti-k3-udma-glue-fix-memory-leak-when-regis.patch @@ -21406,6 +22130,7 @@ patches.suse/ata-libata-transport-fix-error-handling-in-ata_tport.patch patches.suse/ata-libata-transport-fix-error-handling-in-ata_tlink.patch patches.suse/ata-libata-transport-fix-error-handling-in-ata_tdev_.patch + patches.suse/ALSA-memalloc-Try-dma_alloc_noncontiguous-at-first.patch patches.suse/scsi-ibmvfc-Avoid-path-failures-during-live-migratio.patch patches.suse/scsi-scsi_transport_sas-Fix-error-handling-in-sas_phy_add.patch patches.suse/arm64-efi-Fix-handling-of-misaligned-runtime-regions.patch @@ -21434,6 +22159,7 @@ patches.suse/ASoC-tas2770-Fix-set_tdm_slot-in-case-of-single-slot.patch patches.suse/ASoC-tas2764-Fix-set_tdm_slot-in-case-of-single-slot.patch patches.suse/ASoC-soc-utils-Remove-__exit-for-snd_soc_util_exit.patch + patches.suse/ASoC-SOF-topology-No-need-to-assign-core-ID-if-token.patch patches.suse/iio-trigger-sysfs-fix-possible-memory-leak-in-iio_sy.patch patches.suse/iio-pressure-ms5611-changed-hardcoded-SPI-speed-to-v.patch patches.suse/iio-adc-mp2629-fix-wrong-comparison-of-channel.patch @@ -21457,6 +22183,7 @@ patches.suse/serial-8250-Flush-DMA-Rx-on-RLSI.patch patches.suse/Revert-usb-dwc3-disable-USB-core-PHY-management.patch patches.suse/usb-typec-mux-Enter-safe-mode-only-when-pins-need-to.patch + patches.suse/usb-typec-tipd-Prevent-uninitialized-event-1-2-in-IR.patch patches.suse/usb-chipidea-fix-deadlock-in-ci_otg_del_timer.patch patches.suse/drm-vc4-kms-Fix-IS_ERR-vs-NULL-check-for-vc4_kms.patch patches.suse/drm-panel-simple-set-bpc-field-for-logic-technologie.patch @@ -21464,6 +22191,17 @@ patches.suse/drm-Fix-potential-null-ptr-deref-in-drm_vblank_destr.patch patches.suse/Input-iforce-invert-valid-length-check-when-fetching.patch patches.suse/Input-i8042-fix-leaking-of-platform-device-on-module.patch + patches.suse/tracing-ring-buffer-Have-polling-block-on-watermark.patch + patches.suse/ring-buffer-Include-dropped-pages-in-counting-dirty-patches.patch + patches.suse/tracing-Fix-memory-leak-in-tracing_read_pipe.patch + patches.suse/ftrace-Fix-the-possible-incorrect-kernel-message.patch + patches.suse/ftrace-Optimize-the-allocation-for-mcount-entries.patch + patches.suse/ring_buffer-Do-not-deactivate-non-existant-pages.patch + patches.suse/ftrace-Fix-null-pointer-dereference-in-ftrace_add_mod.patch + patches.suse/tracing-Fix-memory-leak-in-test_gen_synth_cmd-and-test_empty_synth_event.patch + patches.suse/tracing-Fix-wild-memory-access-in-register_synth_event.patch + patches.suse/tracing-kprobe-Fix-potential-null-ptr-deref-on-trace_event_file-in-kprobe_event_gen_test_exit.patch + patches.suse/tracing-kprobe-Fix-potential-null-ptr-deref-on-trace_array-in-kprobe_event_gen_test_exit.patch # jejb/scsi for-next patches.suse/scsi-lpfc-Set-sli4_param-s-cmf-option-to-zero-when-C.patch @@ -21572,7 +22310,6 @@ # s390x patches.suse/s390-qeth-remove-OSN-deprecation-notice.patch - patches.suse/s390-block-xpram-include-major-h.patch +ptesarik +sp4_needs_review patches.suse/s390-sles15sp2-kdump-fix-out-of-memory-with-PCI.patch # ppc64 @@ -21797,6 +22534,7 @@ patches.suse/drivers-base-implement-dev_enable_async_probe.patch patches.suse/scsi-add-disable_async_probing-module-argument.patch patches.suse/scsi-lpfc-update-the-obsolete-adapter-list.patch + patches.suse/scsi-do-not-put-scsi_common-in-a-separate-module.patch # CD-ROM patches.suse/cdrom-add-poll_event_interruptible.patch @@ -21840,6 +22578,9 @@ # bsc#1198438: introduces a regression, revert it for the time beeing patches.suse/revert-scsi-qla2xxx-Changes-to-support-FCP2-Target.patch + # bsc#1189297 + patches.suse/scsi_probe_lun-retry-after-timeout.patch + ######################################################## # Networking drivers (wired) ########################################################