Blob Blame History Raw
From 8a3cba87177f9c2ff35ef17603201b93b1ce7cab Mon Sep 17 00:00:00 2001
From: Michal Suchanek <msuchanek@suse.de>
Date: Tue, 8 Oct 2019 19:28:43 +0200
Subject: [PATCH 5/7] bdev: add open_finish

References: bsc#1048585
Patch-mainline: submitted https://lore.kernel.org/lkml/cover.1571834862.git.msuchanek@suse.de/

Opening a block device may require a long operation such as waiting for
the cdrom tray to close. Performing this operation with locks held locks
out other attempts to open the device. These processes waiting to open
the device are not killable.

To avoid this issue and still be able to perform time-consuming checks
at open() time the block device driver can provide open_finish(). If it
does opening the device proceeds even when an error is returned from
open(), bd_mutex is released and open_finish() is called. If
open_finish() succeeds the device is now open, if it fails release() is
called.

When -ERESTARTSYS is returned from open() blkdev_get may loop without
calling open_finish(). On -ERESTARTSYS open_finish() is not called.

When -ENXIO is returned there is no device to retry opening so this
error needs to be honored and open_finish() not called.

Move a ret = 0 assignment up in the if/else branching to avoid returning
-ENXIO. Previously the return value was ignored on the unhandled branch.

2020-07-18: work around calling bdput(bdev) twice (once in blkdev_put(),
once in blkdev_get()) on error after mainline commit 2d3a8e2dedde ("block:
Fix use-after-free in blkdev_get()").

Signed-off-by: Michal Suchanek <msuchanek@suse.de>
---
v2: new patch
v4:
 - fix crash on ENXIO
 - reset first_open on looping to avoid reference leak
 - initialize capacity and partitions after open_finish
---
 fs/block_dev.c         | 37 +++++++++++++++++++++++++++++++++----
 include/linux/blkdev.h |  1 +
 2 files changed, 34 insertions(+), 4 deletions(-)

--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1536,6 +1536,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 	int partno;
 	int perm = 0;
 	bool first_open = false;
+	bool need_finish = false;
 
 	if (mode & FMODE_READ)
 		perm |= MAY_READ;
@@ -1553,6 +1554,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
  restart:
 
 	ret = -ENXIO;
+	first_open = false;
 	disk = bdev_get_gendisk(bdev, &partno);
 	if (!disk)
 		goto out;
@@ -1589,6 +1591,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 					put_disk_and_module(disk);
 					goto restart;
 				}
+				if ((ret != -ENXIO) &&
+				    bdev->bd_disk->fops->open_finish)
+					need_finish = true;
 			}
 
 			if (!ret) {
@@ -1606,7 +1611,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 			    (!ret || ret == -ENOMEDIUM))
 				bdev_disk_changed(bdev, ret == -ENOMEDIUM);
 
-			if (ret)
+			if (ret && !need_finish)
 				goto out_clear;
 		} else {
 			struct block_device *whole;
@@ -1634,15 +1639,19 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 		if (bdev->bd_bdi == &noop_backing_dev_info)
 			bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
 	} else {
+		ret = 0;
 		if (bdev->bd_contains == bdev) {
-			ret = 0;
-			if (bdev->bd_disk->fops->open)
+			if (bdev->bd_disk->fops->open) {
 				ret = bdev->bd_disk->fops->open(bdev, mode);
+				if ((ret != -ERESTARTSYS) && (ret != -ENXIO) &&
+				    bdev->bd_disk->fops->open_finish)
+					need_finish = true;
+			}
 			/* the same as first opener case, read comment there */
 			if (bdev->bd_invalidated &&
 			    (!ret || ret == -ENOMEDIUM))
 				bdev_disk_changed(bdev, ret == -ENOMEDIUM);
-			if (ret)
+			if (ret && !need_finish)
 				goto out_unlock_bdev;
 		}
 	}
@@ -1654,6 +1663,22 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 	/* only one opener holds refs to the module and disk */
 	if (!first_open)
 		put_disk_and_module(disk);
+	if (ret && need_finish) {
+		ret = bdev->bd_disk->fops->open_finish(bdev, mode, ret);
+
+		if (!ret && first_open) {
+			bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
+			set_init_blocksize(bdev);
+		}
+		/* the same as first opener case, read comment there */
+		if (bdev->bd_invalidated)
+			bdev_disk_changed(bdev, ret == -ENOMEDIUM);
+	}
+	if (ret) {
+		bdgrab(bdev);	/* workaround after commit 2d3a8e2dedde */
+		__blkdev_put(bdev, mode, for_part);
+		return ret;
+	}
 	return 0;
 
  out_clear:
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1695,6 +1695,7 @@ static inline struct bio_vec *rq_integrity_vec(struct request *rq)
 struct block_device_operations {
 	blk_qc_t (*submit_bio) (struct bio *bio);
 	int (*open) (struct block_device *, fmode_t);
+	int (*open_finish)(struct block_device *bdev, fmode_t mode, int ret);
 	void (*release) (struct gendisk *, fmode_t);
 	int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int);
 	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);