Blob Blame History Raw
From: Shaohua Li <shli@fb.com>
Date: Wed, 20 Dec 2017 11:10:17 -0700
Subject: [PATCH] block-throttle: avoid double charge
References: bsc#1077989
Git-commit: 111be883981748acc9a56e855c8336404a8e787c
Patch-mainline: v4.15-rc5

If a bio is throttled and split after throttling, the bio could be
resubmited and enters the throttling again. This will cause part of the
bio to be charged multiple times. If the cgroup has an IO limit, the
double charge will significantly harm the performance. The bio split
becomes quite common after arbitrary bio size change.

To fix this, we always set the BIO_THROTTLED flag if a bio is throttled.
If the bio is cloned/split, we copy the flag to new bio too to avoid a
double charge. However, cloned bio could be directed to a new disk,
keeping the flag be a problem. The observation is we always set new disk
for the bio in this case, so we can clear the flag in bio_set_dev().

This issue exists for a long time, arbitrary bio size change just makes
it worse, so this should go into stable at least since v4.2.

V1-> V2: Not add extra field in bio based on discussion with Tejun

Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: stable@vger.kernel.org
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 block/bio.c               | 2 ++
 block/blk-throttle.c      | 8 +-------
 include/linux/bio.h       | 2 ++
 include/linux/blk_types.h | 9 ++++-----
 4 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 5a767401012d..6a9680972e2d 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -599,6 +599,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
 	bio->bi_disk = bio_src->bi_disk;
 	bio->bi_partno = bio_src->bi_partno;
 	bio_set_flag(bio, BIO_CLONED);
+	if (bio_flagged(bio_src, BIO_THROTTLED))
+		bio_set_flag(bio, BIO_THROTTLED);
 	bio->bi_opf = bio_src->bi_opf;
 	bio->bi_write_hint = bio_src->bi_write_hint;
 	bio->bi_iter = bio_src->bi_iter;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a7285bf2831c..8aa50daa4569 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2219,13 +2219,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 out_unlock:
 	spin_unlock_irq(q->queue_lock);
 out:
-	/*
-	 * As multiple blk-throtls may stack in the same issue path, we
-	 * don't want bios to leave with the flag set.  Clear the flag if
-	 * being issued.
-	 */
-	if (!throttled)
-		bio_clear_flag(bio, BIO_THROTTLED);
+	bio_set_flag(bio, BIO_THROTTLED);
 
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
 	if (throttled || !td->track_bio_latency)
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 7c36bc7b4071..44b4947890cf 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -497,6 +497,8 @@ extern const char *bio_devname(struct bio *bio, char *buffer);
 
 #define bio_set_dev(bio, bdev) 			\
 do {						\
+	if ((bio)->bi_disk != (bdev)->bd_disk)	\
+		bio_clear_flag(bio, BIO_THROTTLED);\
 	(bio)->bi_disk = (bdev)->bd_disk;	\
 	(bio)->bi_partno = (bdev)->bd_partno;	\
 } while (0)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index da01189249db..f6ea9d9656b5 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -77,8 +77,6 @@ struct blk_issue_stat {
 struct bio {
 	struct bio		*bi_next;	/* request queue link */
 	struct gendisk		*bi_disk;
-	u8			bi_partno;
-	blk_status_t		bi_status;
 	unsigned int		bi_opf;		/* bottom bits req flags,
 						 * top bits REQ_OP. Use
 						 * accessors.
@@ -86,8 +84,8 @@ struct bio {
 	unsigned short		bi_flags;	/* status, etc and bvec pool number */
 	unsigned short		bi_ioprio;
 	unsigned short		bi_write_hint;
-
-	struct bvec_iter	bi_iter;
+	blk_status_t		bi_status;
+	u8			bi_partno;
 
 	/* Number of segments in this BIO after
 	 * physical address coalescing is performed.
@@ -101,8 +99,9 @@ struct bio {
 	unsigned int		bi_seg_front_size;
 	unsigned int		bi_seg_back_size;
 
-	atomic_t		__bi_remaining;
+	struct bvec_iter	bi_iter;
 
+	atomic_t		__bi_remaining;
 	bio_end_io_t		*bi_end_io;
 
 	void			*bi_private;
-- 
2.12.3