|
Qu Wenruo |
689ef1 |
From ff6bc37eb7f6e7b052e50c13a480e1080b3ec07a Mon Sep 17 00:00:00 2001
|
|
Qu Wenruo |
689ef1 |
From: Qu Wenruo <wqu@suse.com>
|
|
Qu Wenruo |
689ef1 |
Date: Thu, 21 Dec 2017 13:42:04 +0800
|
|
Qu Wenruo |
689ef1 |
Patch-mainline: v4.17-rc2
|
|
Qu Wenruo |
689ef1 |
Git-commit: ff6bc37eb7f6e7b052e50c13a480e1080b3ec07a
|
|
Qu Wenruo |
689ef1 |
References: bsc#1031392
|
|
Qu Wenruo |
689ef1 |
Subject: [PATCH 30/31] btrfs: qgroup: Use independent and accurate per inode
|
|
Qu Wenruo |
689ef1 |
qgroup rsv
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
Unlike reservation calculation used in inode rsv for metadata, qgroup
|
|
Qu Wenruo |
689ef1 |
doesn't really need to care about things like csum size or extent usage
|
|
Qu Wenruo |
689ef1 |
for the whole tree COW.
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
Qgroups care more about net change of the extent usage.
|
|
Qu Wenruo |
689ef1 |
That's to say, if we're going to insert one file extent, it will mostly
|
|
Qu Wenruo |
689ef1 |
find its place in COWed tree block, leaving no change in extent usage.
|
|
Qu Wenruo |
689ef1 |
Or causing a leaf split, resulting in one new net extent and increasing
|
|
Qu Wenruo |
689ef1 |
qgroup number by nodesize.
|
|
Qu Wenruo |
689ef1 |
Or in an even more rare case, increase the tree level, increasing qgroup
|
|
Qu Wenruo |
689ef1 |
number by 2 * nodesize.
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
So here instead of using the complicated calculation for extent
|
|
Qu Wenruo |
689ef1 |
allocator, which cares more about accuracy and no error, qgroup doesn't
|
|
Qu Wenruo |
689ef1 |
need that over-estimated reservation.
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
This patch will maintain 2 new members in btrfs_block_rsv structure for
|
|
Qu Wenruo |
689ef1 |
qgroup, using much smaller calculation for qgroup rsv, reducing false
|
|
Qu Wenruo |
689ef1 |
EDQUOT.
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
Signed-off-by: David Sterba <dsterba@suse.com>
|
|
Qu Wenruo |
689ef1 |
Signed-off-by: Qu Wenruo <wqu@suse.com>
|
|
Qu Wenruo |
689ef1 |
---
|
|
Qu Wenruo |
689ef1 |
fs/btrfs/ctree.h | 19 ++++++++++++++
|
|
Qu Wenruo |
689ef1 |
fs/btrfs/extent-tree.c | 57 ++++++++++++++++++++++++++++++++++--------
|
|
Qu Wenruo |
689ef1 |
2 files changed, 65 insertions(+), 11 deletions(-)
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
|
|
Qu Wenruo |
689ef1 |
index ec84e2dabb04..2771cc56a622 100644
|
|
Qu Wenruo |
689ef1 |
--- a/fs/btrfs/ctree.h
|
|
Qu Wenruo |
689ef1 |
+++ b/fs/btrfs/ctree.h
|
|
Qu Wenruo |
689ef1 |
@@ -459,6 +459,25 @@ struct btrfs_block_rsv {
|
|
Qu Wenruo |
689ef1 |
unsigned short full;
|
|
Qu Wenruo |
689ef1 |
unsigned short type;
|
|
Qu Wenruo |
689ef1 |
unsigned short failfast;
|
|
Qu Wenruo |
689ef1 |
+
|
|
Qu Wenruo |
689ef1 |
+ /*
|
|
Qu Wenruo |
689ef1 |
+ * Qgroup equivalent for @size @reserved
|
|
Qu Wenruo |
689ef1 |
+ *
|
|
Qu Wenruo |
689ef1 |
+ * Unlike normal @size/@reserved for inode rsv, qgroup doesn't care
|
|
Qu Wenruo |
689ef1 |
+ * about things like csum size nor how many tree blocks it will need to
|
|
Qu Wenruo |
689ef1 |
+ * reserve.
|
|
Qu Wenruo |
689ef1 |
+ *
|
|
Qu Wenruo |
689ef1 |
+ * Qgroup cares more about net change of the extent usage.
|
|
Qu Wenruo |
689ef1 |
+ *
|
|
Qu Wenruo |
689ef1 |
+ * So for one newly inserted file extent, in worst case it will cause
|
|
Qu Wenruo |
689ef1 |
+ * leaf split and level increase, nodesize for each file extent is
|
|
Qu Wenruo |
689ef1 |
+ * already too much.
|
|
Qu Wenruo |
689ef1 |
+ *
|
|
Qu Wenruo |
689ef1 |
+ * In short, qgroup_size/reserved is the upper limit of possible needed
|
|
Qu Wenruo |
689ef1 |
+ * qgroup metadata reservation.
|
|
Qu Wenruo |
689ef1 |
+ */
|
|
Qu Wenruo |
689ef1 |
+ u64 qgroup_rsv_size;
|
|
Qu Wenruo |
689ef1 |
+ u64 qgroup_rsv_reserved;
|
|
Qu Wenruo |
689ef1 |
};
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
/*
|
|
Qu Wenruo |
689ef1 |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
|
|
Qu Wenruo |
689ef1 |
index 5bdb5636d552..055494ddcace 100644
|
|
Qu Wenruo |
689ef1 |
--- a/fs/btrfs/extent-tree.c
|
|
Qu Wenruo |
689ef1 |
+++ b/fs/btrfs/extent-tree.c
|
|
Qu Wenruo |
689ef1 |
@@ -5560,14 +5560,18 @@ static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
|
|
Qu Wenruo |
689ef1 |
struct btrfs_block_rsv *block_rsv,
|
|
Qu Wenruo |
689ef1 |
- struct btrfs_block_rsv *dest, u64 num_bytes)
|
|
Qu Wenruo |
689ef1 |
+ struct btrfs_block_rsv *dest, u64 num_bytes,
|
|
Qu Wenruo |
689ef1 |
+ u64 *qgroup_to_release_ret)
|
|
Qu Wenruo |
689ef1 |
{
|
|
Qu Wenruo |
689ef1 |
struct btrfs_space_info *space_info = block_rsv->space_info;
|
|
Qu Wenruo |
689ef1 |
+ u64 qgroup_to_release = 0;
|
|
Qu Wenruo |
689ef1 |
u64 ret;
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
spin_lock(&block_rsv->lock);
|
|
Qu Wenruo |
689ef1 |
- if (num_bytes == (u64)-1)
|
|
Qu Wenruo |
689ef1 |
+ if (num_bytes == (u64)-1) {
|
|
Qu Wenruo |
689ef1 |
num_bytes = block_rsv->size;
|
|
Qu Wenruo |
689ef1 |
+ qgroup_to_release = block_rsv->qgroup_rsv_size;
|
|
Qu Wenruo |
689ef1 |
+ }
|
|
Qu Wenruo |
689ef1 |
block_rsv->size -= num_bytes;
|
|
Qu Wenruo |
689ef1 |
if (block_rsv->reserved >= block_rsv->size) {
|
|
Qu Wenruo |
689ef1 |
num_bytes = block_rsv->reserved - block_rsv->size;
|
|
Qu Wenruo |
689ef1 |
@@ -5576,6 +5580,13 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
|
|
Qu Wenruo |
689ef1 |
} else {
|
|
Qu Wenruo |
689ef1 |
num_bytes = 0;
|
|
Qu Wenruo |
689ef1 |
}
|
|
Qu Wenruo |
689ef1 |
+ if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
|
|
Qu Wenruo |
689ef1 |
+ qgroup_to_release = block_rsv->qgroup_rsv_reserved -
|
|
Qu Wenruo |
689ef1 |
+ block_rsv->qgroup_rsv_size;
|
|
Qu Wenruo |
689ef1 |
+ block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
|
|
Qu Wenruo |
689ef1 |
+ } else {
|
|
Qu Wenruo |
689ef1 |
+ qgroup_to_release = 0;
|
|
Qu Wenruo |
689ef1 |
+ }
|
|
Qu Wenruo |
689ef1 |
spin_unlock(&block_rsv->lock);
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
ret = num_bytes;
|
|
Qu Wenruo |
689ef1 |
@@ -5598,6 +5609,8 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
|
|
Qu Wenruo |
689ef1 |
space_info_add_old_bytes(fs_info, space_info,
|
|
Qu Wenruo |
689ef1 |
num_bytes);
|
|
Qu Wenruo |
689ef1 |
}
|
|
Qu Wenruo |
689ef1 |
+ if (qgroup_to_release_ret)
|
|
Qu Wenruo |
689ef1 |
+ *qgroup_to_release_ret = qgroup_to_release;
|
|
Qu Wenruo |
689ef1 |
return ret;
|
|
Qu Wenruo |
689ef1 |
}
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
@@ -5739,17 +5752,21 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
|
|
Qu Wenruo |
689ef1 |
struct btrfs_root *root = inode->root;
|
|
Qu Wenruo |
689ef1 |
struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
|
|
Qu Wenruo |
689ef1 |
u64 num_bytes = 0;
|
|
Qu Wenruo |
689ef1 |
+ u64 qgroup_num_bytes = 0;
|
|
Qu Wenruo |
689ef1 |
int ret = -ENOSPC;
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
spin_lock(&block_rsv->lock);
|
|
Qu Wenruo |
689ef1 |
if (block_rsv->reserved < block_rsv->size)
|
|
Qu Wenruo |
689ef1 |
num_bytes = block_rsv->size - block_rsv->reserved;
|
|
Qu Wenruo |
689ef1 |
+ if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
|
|
Qu Wenruo |
689ef1 |
+ qgroup_num_bytes = block_rsv->qgroup_rsv_size -
|
|
Qu Wenruo |
689ef1 |
+ block_rsv->qgroup_rsv_reserved;
|
|
Qu Wenruo |
689ef1 |
spin_unlock(&block_rsv->lock);
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
if (num_bytes == 0)
|
|
Qu Wenruo |
689ef1 |
return 0;
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
- ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
|
|
Qu Wenruo |
689ef1 |
+ ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes, true);
|
|
Qu Wenruo |
689ef1 |
if (ret)
|
|
Qu Wenruo |
689ef1 |
return ret;
|
|
Qu Wenruo |
689ef1 |
ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
|
|
Qu Wenruo |
689ef1 |
@@ -5757,7 +5774,13 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
|
|
Qu Wenruo |
689ef1 |
block_rsv_add_bytes(block_rsv, num_bytes, 0);
|
|
Qu Wenruo |
689ef1 |
trace_btrfs_space_reservation(root->fs_info, "delalloc",
|
|
Qu Wenruo |
689ef1 |
btrfs_ino(inode), num_bytes, 1);
|
|
Qu Wenruo |
689ef1 |
- }
|
|
Qu Wenruo |
689ef1 |
+
|
|
Qu Wenruo |
689ef1 |
+ /* Don't forget to increase qgroup_rsv_reserved */
|
|
Qu Wenruo |
689ef1 |
+ spin_lock(&block_rsv->lock);
|
|
Qu Wenruo |
689ef1 |
+ block_rsv->qgroup_rsv_reserved += qgroup_num_bytes;
|
|
Qu Wenruo |
689ef1 |
+ spin_unlock(&block_rsv->lock);
|
|
Qu Wenruo |
689ef1 |
+ } else
|
|
Qu Wenruo |
689ef1 |
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
|
|
Qu Wenruo |
689ef1 |
return ret;
|
|
Qu Wenruo |
689ef1 |
}
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
@@ -5778,20 +5801,23 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
|
|
Qu Wenruo |
689ef1 |
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
|
|
Qu Wenruo |
689ef1 |
struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
|
|
Qu Wenruo |
689ef1 |
u64 released = 0;
|
|
Qu Wenruo |
689ef1 |
+ u64 qgroup_to_release = 0;
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
/*
|
|
Qu Wenruo |
689ef1 |
* Since we statically set the block_rsv->size we just want to say we
|
|
Qu Wenruo |
689ef1 |
* are releasing 0 bytes, and then we'll just get the reservation over
|
|
Qu Wenruo |
689ef1 |
* the size free'd.
|
|
Qu Wenruo |
689ef1 |
*/
|
|
Qu Wenruo |
689ef1 |
- released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0);
|
|
Qu Wenruo |
689ef1 |
+ released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0,
|
|
Qu Wenruo |
689ef1 |
+ &qgroup_to_release);
|
|
Qu Wenruo |
689ef1 |
if (released > 0)
|
|
Qu Wenruo |
689ef1 |
trace_btrfs_space_reservation(fs_info, "delalloc",
|
|
Qu Wenruo |
689ef1 |
btrfs_ino(inode), released, 0);
|
|
Qu Wenruo |
689ef1 |
if (qgroup_free)
|
|
Qu Wenruo |
689ef1 |
- btrfs_qgroup_free_meta_prealloc(inode->root, released);
|
|
Qu Wenruo |
689ef1 |
+ btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release);
|
|
Qu Wenruo |
689ef1 |
else
|
|
Qu Wenruo |
689ef1 |
- btrfs_qgroup_convert_reserved_meta(inode->root, released);
|
|
Qu Wenruo |
689ef1 |
+ btrfs_qgroup_convert_reserved_meta(inode->root,
|
|
Qu Wenruo |
689ef1 |
+ qgroup_to_release);
|
|
Qu Wenruo |
689ef1 |
}
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
|
|
Qu Wenruo |
689ef1 |
@@ -5803,7 +5829,7 @@ void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
|
|
Qu Wenruo |
689ef1 |
if (global_rsv == block_rsv ||
|
|
Qu Wenruo |
689ef1 |
block_rsv->space_info != global_rsv->space_info)
|
|
Qu Wenruo |
689ef1 |
global_rsv = NULL;
|
|
Qu Wenruo |
689ef1 |
- block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes);
|
|
Qu Wenruo |
689ef1 |
+ block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes, NULL);
|
|
Qu Wenruo |
689ef1 |
}
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
|
|
Qu Wenruo |
689ef1 |
@@ -5883,7 +5909,7 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
|
|
Qu Wenruo |
689ef1 |
static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
|
|
Qu Wenruo |
689ef1 |
{
|
|
Qu Wenruo |
689ef1 |
block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
|
|
Qu Wenruo |
689ef1 |
- (u64)-1);
|
|
Qu Wenruo |
689ef1 |
+ (u64)-1, NULL);
|
|
Qu Wenruo |
689ef1 |
WARN_ON(fs_info->trans_block_rsv.size > 0);
|
|
Qu Wenruo |
689ef1 |
WARN_ON(fs_info->trans_block_rsv.reserved > 0);
|
|
Qu Wenruo |
689ef1 |
WARN_ON(fs_info->chunk_block_rsv.size > 0);
|
|
Qu Wenruo |
689ef1 |
@@ -5907,7 +5933,7 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
|
|
Qu Wenruo |
689ef1 |
WARN_ON_ONCE(!list_empty(&trans->new_bgs));
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
|
|
Qu Wenruo |
689ef1 |
- trans->chunk_bytes_reserved);
|
|
Qu Wenruo |
689ef1 |
+ trans->chunk_bytes_reserved, NULL);
|
|
Qu Wenruo |
689ef1 |
trans->chunk_bytes_reserved = 0;
|
|
Qu Wenruo |
689ef1 |
}
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
@@ -6012,6 +6038,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
|
|
Qu Wenruo |
689ef1 |
{
|
|
Qu Wenruo |
689ef1 |
struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
|
|
Qu Wenruo |
689ef1 |
u64 reserve_size = 0;
|
|
Qu Wenruo |
689ef1 |
+ u64 qgroup_rsv_size = 0;
|
|
Qu Wenruo |
689ef1 |
u64 csum_leaves;
|
|
Qu Wenruo |
689ef1 |
unsigned outstanding_extents;
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
@@ -6024,9 +6051,17 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
|
|
Qu Wenruo |
689ef1 |
inode->csum_bytes);
|
|
Qu Wenruo |
689ef1 |
reserve_size += btrfs_calc_trans_metadata_size(fs_info,
|
|
Qu Wenruo |
689ef1 |
csum_leaves);
|
|
Qu Wenruo |
689ef1 |
+ /*
|
|
Qu Wenruo |
689ef1 |
+ * For qgroup rsv, the calculation is very simple:
|
|
Qu Wenruo |
689ef1 |
+ * account one nodesize for each outstanding extent
|
|
Qu Wenruo |
689ef1 |
+ *
|
|
Qu Wenruo |
689ef1 |
+ * This is overestimating in most cases.
|
|
Qu Wenruo |
689ef1 |
+ */
|
|
Qu Wenruo |
689ef1 |
+ qgroup_rsv_size = outstanding_extents * fs_info->nodesize;
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
spin_lock(&block_rsv->lock);
|
|
Qu Wenruo |
689ef1 |
block_rsv->size = reserve_size;
|
|
Qu Wenruo |
689ef1 |
+ block_rsv->qgroup_rsv_size = qgroup_rsv_size;
|
|
Qu Wenruo |
689ef1 |
spin_unlock(&block_rsv->lock);
|
|
Qu Wenruo |
689ef1 |
}
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
@@ -8405,7 +8440,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
|
|
Qu Wenruo |
689ef1 |
struct btrfs_block_rsv *block_rsv, u32 blocksize)
|
|
Qu Wenruo |
689ef1 |
{
|
|
Qu Wenruo |
689ef1 |
block_rsv_add_bytes(block_rsv, blocksize, 0);
|
|
Qu Wenruo |
689ef1 |
- block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
|
|
Qu Wenruo |
689ef1 |
+ block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL);
|
|
Qu Wenruo |
689ef1 |
}
|
|
Qu Wenruo |
689ef1 |
|
|
Qu Wenruo |
689ef1 |
/*
|
|
Qu Wenruo |
689ef1 |
--
|
|
Qu Wenruo |
689ef1 |
2.19.0
|
|
Qu Wenruo |
689ef1 |
|