Qu Wenruo 689ef1
From ff6bc37eb7f6e7b052e50c13a480e1080b3ec07a Mon Sep 17 00:00:00 2001
Qu Wenruo 689ef1
From: Qu Wenruo <wqu@suse.com>
Qu Wenruo 689ef1
Date: Thu, 21 Dec 2017 13:42:04 +0800
Qu Wenruo 689ef1
Patch-mainline: v4.17-rc2
Qu Wenruo 689ef1
Git-commit: ff6bc37eb7f6e7b052e50c13a480e1080b3ec07a
Qu Wenruo 689ef1
References: bsc#1031392
Qu Wenruo 689ef1
Subject: [PATCH 30/31] btrfs: qgroup: Use independent and accurate per inode
Qu Wenruo 689ef1
 qgroup rsv
Qu Wenruo 689ef1
Qu Wenruo 689ef1
Unlike reservation calculation used in inode rsv for metadata, qgroup
Qu Wenruo 689ef1
doesn't really need to care about things like csum size or extent usage
Qu Wenruo 689ef1
for the whole tree COW.
Qu Wenruo 689ef1
Qu Wenruo 689ef1
Qgroups care more about net change of the extent usage.
Qu Wenruo 689ef1
That's to say, if we're going to insert one file extent, it will mostly
Qu Wenruo 689ef1
find its place in COWed tree block, leaving no change in extent usage.
Qu Wenruo 689ef1
Or causing a leaf split, resulting in one new net extent and increasing
Qu Wenruo 689ef1
qgroup number by nodesize.
Qu Wenruo 689ef1
Or in an even more rare case, increase the tree level, increasing qgroup
Qu Wenruo 689ef1
number by 2 * nodesize.
Qu Wenruo 689ef1
Qu Wenruo 689ef1
So here instead of using the complicated calculation for extent
Qu Wenruo 689ef1
allocator, which cares more about accuracy and no error, qgroup doesn't
Qu Wenruo 689ef1
need that over-estimated reservation.
Qu Wenruo 689ef1
Qu Wenruo 689ef1
This patch will maintain 2 new members in btrfs_block_rsv structure for
Qu Wenruo 689ef1
qgroup, using much smaller calculation for qgroup rsv, reducing false
Qu Wenruo 689ef1
EDQUOT.
Qu Wenruo 689ef1
Qu Wenruo 689ef1
Signed-off-by: David Sterba <dsterba@suse.com>
Qu Wenruo 689ef1
Signed-off-by: Qu Wenruo <wqu@suse.com>
Qu Wenruo 689ef1
---
Qu Wenruo 689ef1
 fs/btrfs/ctree.h       | 19 ++++++++++++++
Qu Wenruo 689ef1
 fs/btrfs/extent-tree.c | 57 ++++++++++++++++++++++++++++++++++--------
Qu Wenruo 689ef1
 2 files changed, 65 insertions(+), 11 deletions(-)
Qu Wenruo 689ef1
Qu Wenruo 689ef1
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
Qu Wenruo 689ef1
index ec84e2dabb04..2771cc56a622 100644
Qu Wenruo 689ef1
--- a/fs/btrfs/ctree.h
Qu Wenruo 689ef1
+++ b/fs/btrfs/ctree.h
Qu Wenruo 689ef1
@@ -459,6 +459,25 @@ struct btrfs_block_rsv {
Qu Wenruo 689ef1
 	unsigned short full;
Qu Wenruo 689ef1
 	unsigned short type;
Qu Wenruo 689ef1
 	unsigned short failfast;
Qu Wenruo 689ef1
+
Qu Wenruo 689ef1
+	/*
Qu Wenruo 689ef1
+	 * Qgroup equivalent for @size @reserved
Qu Wenruo 689ef1
+	 *
Qu Wenruo 689ef1
+	 * Unlike normal @size/@reserved for inode rsv, qgroup doesn't care
Qu Wenruo 689ef1
+	 * about things like csum size nor how many tree blocks it will need to
Qu Wenruo 689ef1
+	 * reserve.
Qu Wenruo 689ef1
+	 *
Qu Wenruo 689ef1
+	 * Qgroup cares more about net change of the extent usage.
Qu Wenruo 689ef1
+	 *
Qu Wenruo 689ef1
+	 * So for one newly inserted file extent, in worst case it will cause
Qu Wenruo 689ef1
+	 * leaf split and level increase, nodesize for each file extent is
Qu Wenruo 689ef1
+	 * already too much.
Qu Wenruo 689ef1
+	 *
Qu Wenruo 689ef1
+	 * In short, qgroup_size/reserved is the upper limit of possible needed
Qu Wenruo 689ef1
+	 * qgroup metadata reservation.
Qu Wenruo 689ef1
+	 */
Qu Wenruo 689ef1
+	u64 qgroup_rsv_size;
Qu Wenruo 689ef1
+	u64 qgroup_rsv_reserved;
Qu Wenruo 689ef1
 };
Qu Wenruo 689ef1
 
Qu Wenruo 689ef1
 /*
Qu Wenruo 689ef1
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
Qu Wenruo 689ef1
index 5bdb5636d552..055494ddcace 100644
Qu Wenruo 689ef1
--- a/fs/btrfs/extent-tree.c
Qu Wenruo 689ef1
+++ b/fs/btrfs/extent-tree.c
Qu Wenruo 689ef1
@@ -5560,14 +5560,18 @@ static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
Qu Wenruo 689ef1
 
Qu Wenruo 689ef1
 static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
Qu Wenruo 689ef1
 				    struct btrfs_block_rsv *block_rsv,
Qu Wenruo 689ef1
-				    struct btrfs_block_rsv *dest, u64 num_bytes)
Qu Wenruo 689ef1
+				    struct btrfs_block_rsv *dest, u64 num_bytes,
Qu Wenruo 689ef1
+				    u64 *qgroup_to_release_ret)
Qu Wenruo 689ef1
 {
Qu Wenruo 689ef1
 	struct btrfs_space_info *space_info = block_rsv->space_info;
Qu Wenruo 689ef1
+	u64 qgroup_to_release = 0;
Qu Wenruo 689ef1
 	u64 ret;
Qu Wenruo 689ef1
 
Qu Wenruo 689ef1
 	spin_lock(&block_rsv->lock);
Qu Wenruo 689ef1
-	if (num_bytes == (u64)-1)
Qu Wenruo 689ef1
+	if (num_bytes == (u64)-1) {
Qu Wenruo 689ef1
 		num_bytes = block_rsv->size;
Qu Wenruo 689ef1
+		qgroup_to_release = block_rsv->qgroup_rsv_size;
Qu Wenruo 689ef1
+	}
Qu Wenruo 689ef1
 	block_rsv->size -= num_bytes;
Qu Wenruo 689ef1
 	if (block_rsv->reserved >= block_rsv->size) {
Qu Wenruo 689ef1
 		num_bytes = block_rsv->reserved - block_rsv->size;
Qu Wenruo 689ef1
@@ -5576,6 +5580,13 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
Qu Wenruo 689ef1
 	} else {
Qu Wenruo 689ef1
 		num_bytes = 0;
Qu Wenruo 689ef1
 	}
Qu Wenruo 689ef1
+	if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
Qu Wenruo 689ef1
+		qgroup_to_release = block_rsv->qgroup_rsv_reserved -
Qu Wenruo 689ef1
+				    block_rsv->qgroup_rsv_size;
Qu Wenruo 689ef1
+		block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
Qu Wenruo 689ef1
+	} else {
Qu Wenruo 689ef1
+		qgroup_to_release = 0;
Qu Wenruo 689ef1
+	}
Qu Wenruo 689ef1
 	spin_unlock(&block_rsv->lock);
Qu Wenruo 689ef1
 
Qu Wenruo 689ef1
 	ret = num_bytes;
Qu Wenruo 689ef1
@@ -5598,6 +5609,8 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
Qu Wenruo 689ef1
 			space_info_add_old_bytes(fs_info, space_info,
Qu Wenruo 689ef1
 						 num_bytes);
Qu Wenruo 689ef1
 	}
Qu Wenruo 689ef1
+	if (qgroup_to_release_ret)
Qu Wenruo 689ef1
+		*qgroup_to_release_ret = qgroup_to_release;
Qu Wenruo 689ef1
 	return ret;
Qu Wenruo 689ef1
 }
Qu Wenruo 689ef1
 
Qu Wenruo 689ef1
@@ -5739,17 +5752,21 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
Qu Wenruo 689ef1
 	struct btrfs_root *root = inode->root;
Qu Wenruo 689ef1
 	struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
Qu Wenruo 689ef1
 	u64 num_bytes = 0;
Qu Wenruo 689ef1
+	u64 qgroup_num_bytes = 0;
Qu Wenruo 689ef1
 	int ret = -ENOSPC;
Qu Wenruo 689ef1
 
Qu Wenruo 689ef1
 	spin_lock(&block_rsv->lock);
Qu Wenruo 689ef1
 	if (block_rsv->reserved < block_rsv->size)
Qu Wenruo 689ef1
 		num_bytes = block_rsv->size - block_rsv->reserved;
Qu Wenruo 689ef1
+	if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
Qu Wenruo 689ef1
+		qgroup_num_bytes = block_rsv->qgroup_rsv_size -
Qu Wenruo 689ef1
+				   block_rsv->qgroup_rsv_reserved;
Qu Wenruo 689ef1
 	spin_unlock(&block_rsv->lock);
Qu Wenruo 689ef1
 
Qu Wenruo 689ef1
 	if (num_bytes == 0)
Qu Wenruo 689ef1
 		return 0;
Qu Wenruo 689ef1
 
Qu Wenruo 689ef1
-	ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
Qu Wenruo 689ef1
+	ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes, true);
Qu Wenruo 689ef1
 	if (ret)
Qu Wenruo 689ef1
 		return ret;
Qu Wenruo 689ef1
 	ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
Qu Wenruo 689ef1
@@ -5757,7 +5774,13 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
Qu Wenruo 689ef1
 		block_rsv_add_bytes(block_rsv, num_bytes, 0);
Qu Wenruo 689ef1
 		trace_btrfs_space_reservation(root->fs_info, "delalloc",
Qu Wenruo 689ef1
 					      btrfs_ino(inode), num_bytes, 1);
Qu Wenruo 689ef1
-	}
Qu Wenruo 689ef1
+
Qu Wenruo 689ef1
+		/* Don't forget to increase qgroup_rsv_reserved */
Qu Wenruo 689ef1
+		spin_lock(&block_rsv->lock);
Qu Wenruo 689ef1
+		block_rsv->qgroup_rsv_reserved += qgroup_num_bytes;
Qu Wenruo 689ef1
+		spin_unlock(&block_rsv->lock);
Qu Wenruo 689ef1
+	} else
Qu Wenruo 689ef1
+		btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
Qu Wenruo 689ef1
 	return ret;
Qu Wenruo 689ef1
 }
Qu Wenruo 689ef1
 
Qu Wenruo 689ef1
@@ -5778,20 +5801,23 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
Qu Wenruo 689ef1
 	struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
Qu Wenruo 689ef1
 	struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
Qu Wenruo 689ef1
 	u64 released = 0;
Qu Wenruo 689ef1
+	u64 qgroup_to_release = 0;
Qu Wenruo 689ef1
 
Qu Wenruo 689ef1
 	/*
Qu Wenruo 689ef1
 	 * Since we statically set the block_rsv->size we just want to say we
Qu Wenruo 689ef1
 	 * are releasing 0 bytes, and then we'll just get the reservation over
Qu Wenruo 689ef1
 	 * the size free'd.
Qu Wenruo 689ef1
 	 */
Qu Wenruo 689ef1
-	released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0);
Qu Wenruo 689ef1
+	released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0,
Qu Wenruo 689ef1
+					   &qgroup_to_release);
Qu Wenruo 689ef1
 	if (released > 0)
Qu Wenruo 689ef1
 		trace_btrfs_space_reservation(fs_info, "delalloc",
Qu Wenruo 689ef1
 					      btrfs_ino(inode), released, 0);
Qu Wenruo 689ef1
 	if (qgroup_free)
Qu Wenruo 689ef1
-		btrfs_qgroup_free_meta_prealloc(inode->root, released);
Qu Wenruo 689ef1
+		btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release);
Qu Wenruo 689ef1
 	else
Qu Wenruo 689ef1
-		btrfs_qgroup_convert_reserved_meta(inode->root, released);
Qu Wenruo 689ef1
+		btrfs_qgroup_convert_reserved_meta(inode->root,
Qu Wenruo 689ef1
+						   qgroup_to_release);
Qu Wenruo 689ef1
 }
Qu Wenruo 689ef1
 
Qu Wenruo 689ef1
 void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
Qu Wenruo 689ef1
@@ -5803,7 +5829,7 @@ void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
Qu Wenruo 689ef1
 	if (global_rsv == block_rsv ||
Qu Wenruo 689ef1
 	    block_rsv->space_info != global_rsv->space_info)
Qu Wenruo 689ef1
 		global_rsv = NULL;
Qu Wenruo 689ef1
-	block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes);
Qu Wenruo 689ef1
+	block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes, NULL);
Qu Wenruo 689ef1
 }
Qu Wenruo 689ef1
 
Qu Wenruo 689ef1
 static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
Qu Wenruo 689ef1
@@ -5883,7 +5909,7 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
Qu Wenruo 689ef1
 static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
Qu Wenruo 689ef1
 {
Qu Wenruo 689ef1
 	block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
Qu Wenruo 689ef1
-				(u64)-1);
Qu Wenruo 689ef1
+				(u64)-1, NULL);
Qu Wenruo 689ef1
 	WARN_ON(fs_info->trans_block_rsv.size > 0);
Qu Wenruo 689ef1
 	WARN_ON(fs_info->trans_block_rsv.reserved > 0);
Qu Wenruo 689ef1
 	WARN_ON(fs_info->chunk_block_rsv.size > 0);
Qu Wenruo 689ef1
@@ -5907,7 +5933,7 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
Qu Wenruo 689ef1
 	WARN_ON_ONCE(!list_empty(&trans->new_bgs));
Qu Wenruo 689ef1
 
Qu Wenruo 689ef1
 	block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
Qu Wenruo 689ef1
-				trans->chunk_bytes_reserved);
Qu Wenruo 689ef1
+				trans->chunk_bytes_reserved, NULL);
Qu Wenruo 689ef1
 	trans->chunk_bytes_reserved = 0;
Qu Wenruo 689ef1
 }
Qu Wenruo 689ef1
 
Qu Wenruo 689ef1
@@ -6012,6 +6038,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
Qu Wenruo 689ef1
 {
Qu Wenruo 689ef1
 	struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
Qu Wenruo 689ef1
 	u64 reserve_size = 0;
Qu Wenruo 689ef1
+	u64 qgroup_rsv_size = 0;
Qu Wenruo 689ef1
 	u64 csum_leaves;
Qu Wenruo 689ef1
 	unsigned outstanding_extents;
Qu Wenruo 689ef1
 
Qu Wenruo 689ef1
@@ -6024,9 +6051,17 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
Qu Wenruo 689ef1
 						 inode->csum_bytes);
Qu Wenruo 689ef1
 	reserve_size += btrfs_calc_trans_metadata_size(fs_info,
Qu Wenruo 689ef1
 						       csum_leaves);
Qu Wenruo 689ef1
+	/*
Qu Wenruo 689ef1
+	 * For qgroup rsv, the calculation is very simple:
Qu Wenruo 689ef1
+	 * account one nodesize for each outstanding extent
Qu Wenruo 689ef1
+	 *
Qu Wenruo 689ef1
+	 * This is overestimating in most cases.
Qu Wenruo 689ef1
+	 */
Qu Wenruo 689ef1
+	qgroup_rsv_size = outstanding_extents * fs_info->nodesize;
Qu Wenruo 689ef1
 
Qu Wenruo 689ef1
 	spin_lock(&block_rsv->lock);
Qu Wenruo 689ef1
 	block_rsv->size = reserve_size;
Qu Wenruo 689ef1
+	block_rsv->qgroup_rsv_size = qgroup_rsv_size;
Qu Wenruo 689ef1
 	spin_unlock(&block_rsv->lock);
Qu Wenruo 689ef1
 }
Qu Wenruo 689ef1
 
Qu Wenruo 689ef1
@@ -8405,7 +8440,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
Qu Wenruo 689ef1
 			    struct btrfs_block_rsv *block_rsv, u32 blocksize)
Qu Wenruo 689ef1
 {
Qu Wenruo 689ef1
 	block_rsv_add_bytes(block_rsv, blocksize, 0);
Qu Wenruo 689ef1
-	block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
Qu Wenruo 689ef1
+	block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL);
Qu Wenruo 689ef1
 }
Qu Wenruo 689ef1
 
Qu Wenruo 689ef1
 /*
Qu Wenruo 689ef1
-- 
Qu Wenruo 689ef1
2.19.0
Qu Wenruo 689ef1