Blob Blame History Raw
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 7 Apr 2020 11:38:49 +0100
Subject: btrfs: fix reclaim counter leak of space_info objects
Git-commit: d611add48b717ae34f59e0f474bfa7a7d840c4c4
Patch-mainline: v5.7-rc2
References: bsc#1178897

Whenever we add a ticket to a space_info object we increment the object's
reclaim_size counter witht the ticket's bytes, and we decrement it with
the corresponding amount only when we are able to grant the requested
space to the ticket. When we are not able to grant the space to a ticket,
or when the ticket is removed due to a signal (e.g. an application has
received sigterm from the terminal) we never decrement the counter with
the corresponding bytes from the ticket. This leak can result in the
space reclaim code to later do much more work than necessary. So fix it
by decrementing the counter when those two cases happen as well.

Fixes: db161806dc5615 ("btrfs: account ticket size at add/delete time")
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Acked-by: Nikolay Borisov <nborisov@suse.com>
---
 fs/btrfs/block-group.c |    1 +
 fs/btrfs/space-info.c  |   20 ++++++++++++++------
 2 files changed, 15 insertions(+), 6 deletions(-)

--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -3184,6 +3184,7 @@ int btrfs_free_block_groups(struct btrfs
 			    space_info->bytes_reserved > 0 ||
 			    space_info->bytes_may_use > 0))
 			btrfs_dump_space_info(info, space_info, 0, 0);
+		WARN_ON(space_info->reclaim_size > 0);
 		list_del(&space_info->list);
 		for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
 			struct kobject *kobj;
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -239,6 +239,16 @@ int btrfs_can_overcommit(struct btrfs_fs
 	return 0;
 }
 
+static void remove_ticket(struct btrfs_space_info *space_info,
+			  struct reserve_ticket *ticket)
+{
+	if (!list_empty(&ticket->list)) {
+		list_del_init(&ticket->list);
+		ASSERT(space_info->reclaim_size >= ticket->bytes);
+		space_info->reclaim_size -= ticket->bytes;
+	}
+}
+
 /*
  * This is for space we already have accounted in space_info->bytes_may_use, so
  * basically when we're returning space from block_rsv's.
@@ -265,9 +275,7 @@ again:
 			btrfs_space_info_update_bytes_may_use(fs_info,
 							      space_info,
 							      ticket->bytes);
-			list_del_init(&ticket->list);
-			ASSERT(space_info->reclaim_size >= ticket->bytes);
-			space_info->reclaim_size -= ticket->bytes;
+			remove_ticket(space_info, ticket);
 			ticket->bytes = 0;
 			space_info->tickets_id++;
 			wake_up(&ticket->wait);
@@ -766,7 +774,7 @@ static bool maybe_fail_all_tickets(struc
 			btrfs_info(fs_info, "failing ticket with %llu bytes",
 				   ticket->bytes);
 
-		list_del_init(&ticket->list);
+		remove_ticket(space_info, ticket);
 		ticket->error = -ENOSPC;
 		wake_up(&ticket->wait);
 
@@ -935,7 +943,7 @@ static void wait_reserve_ticket(struct b
 			 * despite getting an error, resulting in a space leak
 			 * (bytes_may_use counter of our space_info).
 			 */
-			list_del_init(&ticket->list);
+			remove_ticket(space_info, ticket);
 			ticket->error = -EINTR;
 			break;
 		}
@@ -993,7 +1001,7 @@ static int handle_reserve_ticket(struct
 		 * either the async reclaim job deletes the ticket from the list
 		 * or we delete it ourselves at wait_reserve_ticket().
 		 */
-		list_del_init(&ticket->list);
+		remove_ticket(space_info, ticket);
 		if (!ret)
 			ret = -ENOSPC;
 	}