From: Qu Wenruo <quwenruo@cn.fujitsu.com>
Subject: REVERT: btrfs: qgroup: Move half of the qgroup accounting time out of commit
trans
Patch-mainline: No, reverted patch
References: bsc#1083684
This patch reverts the following commit. It introduces tree locking
deadlocks when resolving references.
X-Git-commit: fb235dc06fac9eaa4408ade9c8b20d45d63c89b7
Just as Filipe pointed out, the most time consuming parts of qgroup are
btrfs_qgroup_account_extents() and
btrfs_qgroup_prepare_account_extents().
Which both call btrfs_find_all_roots() to get old_roots and new_roots
ulist.
What makes things worse is, we're calling that expensive
btrfs_find_all_roots() at transaction committing time with
TRANS_STATE_COMMIT_DOING, which will blocks all incoming transaction.
Such behavior is necessary for @new_roots search as current
btrfs_find_all_roots() can't do it correctly so we do call it just
before switch commit roots.
However for @old_roots search, it's not necessary as such search is
based on commit_root, so it will always be correct and we can move it
out of transaction committing.
This patch moves the @old_roots search part out of
commit_transaction(), so in theory we can half the time qgroup time
consumption at commit_transaction().
But please note that, this won't speedup qgroup overall, the total time
consumption is still the same, just reduce the performance stall.
Cc: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Acked-by: Jeff Mahoney <jeffm@suse.com>
---
---
fs/btrfs/delayed-ref.c | 20 +++-----------------
fs/btrfs/qgroup.c | 30 +++---------------------------
fs/btrfs/qgroup.h | 33 +++------------------------------
3 files changed, 9 insertions(+), 74 deletions(-)
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -558,7 +558,6 @@ add_delayed_ref_head(struct btrfs_fs_inf
struct btrfs_qgroup_extent_record *qrecord,
u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
int action, int is_data, int is_system,
- int *qrecord_inserted_ret,
int *old_ref_mod, int *new_ref_mod)
{
@@ -567,7 +566,6 @@ add_delayed_ref_head(struct btrfs_fs_inf
struct btrfs_delayed_ref_root *delayed_refs;
int count_mod = 1;
int must_insert_reserved = 0;
- int qrecord_inserted = 0;
/* If reserved is provided, it must be a data extent. */
BUG_ON(!is_data && reserved);
@@ -635,8 +633,6 @@ add_delayed_ref_head(struct btrfs_fs_inf
if(btrfs_qgroup_trace_extent_nolock(fs_info,
delayed_refs, qrecord))
kfree(qrecord);
- else
- qrecord_inserted = 1;
}
spin_lock_init(&head_ref->lock);
@@ -667,8 +663,6 @@ add_delayed_ref_head(struct btrfs_fs_inf
atomic_inc(&delayed_refs->num_entries);
trans->delayed_ref_updates++;
}
- if (qrecord_inserted_ret)
- *qrecord_inserted_ret = qrecord_inserted;
if (new_ref_mod)
*new_ref_mod = head_ref->total_ref_mod;
return head_ref;
@@ -801,7 +795,6 @@ int btrfs_add_delayed_tree_ref(struct bt
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
- int qrecord_inserted;
int is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
BUG_ON(extent_op && extent_op->is_data);
@@ -831,15 +824,12 @@ int btrfs_add_delayed_tree_ref(struct bt
*/
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
bytenr, num_bytes, 0, 0, action, 0,
- is_system, &qrecord_inserted,
- old_ref_mod, new_ref_mod);
+ is_system, old_ref_mod, new_ref_mod);
add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action);
spin_unlock(&delayed_refs->lock);
- if (qrecord_inserted)
- return btrfs_qgroup_trace_extent_post(fs_info, record);
return 0;
free_head_ref:
@@ -864,7 +854,6 @@ int btrfs_add_delayed_data_ref(struct bt
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
- int qrecord_inserted;
ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
if (!ref)
@@ -898,16 +887,13 @@ int btrfs_add_delayed_data_ref(struct bt
*/
head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
bytenr, num_bytes, ref_root, reserved,
- action, 1, 0, &qrecord_inserted,
- old_ref_mod, new_ref_mod);
+ action, 1, 0, old_ref_mod, new_ref_mod);
add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
action);
spin_unlock(&delayed_refs->lock);
- if (qrecord_inserted)
- return btrfs_qgroup_trace_extent_post(fs_info, record);
return 0;
}
@@ -935,7 +921,7 @@ int btrfs_add_delayed_extent_op(struct b
*/
add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr,
num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
- extent_op->is_data, 0, NULL, NULL, NULL);
+ extent_op->is_data, 0, NULL, NULL);
spin_unlock(&delayed_refs->lock);
return 0;
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1501,28 +1501,6 @@ int btrfs_qgroup_trace_extent_nolock(str
return 0;
}
-int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_extent_record *qrecord)
-{
- struct ulist *old_root;
- u64 bytenr = qrecord->bytenr;
- int ret;
-
- ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root);
- if (ret < 0)
- return ret;
-
- /*
- * Here we don't need to get the lock of
- * trans->transaction->delayed_refs, since inserted qrecord won't
- * be deleted, only qrecord->node may be modified (new qrecord insert)
- *
- * So modifying qrecord->old_roots is safe here
- */
- qrecord->old_roots = old_root;
- return 0;
-}
-
int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
gfp_t gfp_flag)
@@ -1548,11 +1526,9 @@ int btrfs_qgroup_trace_extent(struct btr
spin_lock(&delayed_refs->lock);
ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
spin_unlock(&delayed_refs->lock);
- if (ret > 0) {
+ if (ret > 0)
kfree(record);
- return 0;
- }
- return btrfs_qgroup_trace_extent_post(fs_info, record);
+ return 0;
}
int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
@@ -2097,7 +2073,7 @@ int btrfs_qgroup_account_extents(struct
* Old roots should be searched when inserting qgroup
* extent record
*/
- if (WARN_ON(!record->old_roots)) {
+ if (!record->old_roots) {
/* Search commit root to find old_roots */
ret = btrfs_find_all_roots(NULL, fs_info,
record->bytenr, 0,
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -179,10 +179,9 @@ struct btrfs_delayed_extent_op;
/*
* Inform qgroup to trace one dirty extent, its info is recorded in @record.
- * So qgroup can account it at transaction committing time.
+ * So qgroup can account it at commit trans time.
*
- * No lock version, caller must acquire delayed ref lock and allocated memory,
- * then call btrfs_qgroup_trace_extent_post() after exiting lock context.
+ * No lock version, caller must acquire delayed ref lock and allocate memory.
*
* Return 0 for success insert
* Return >0 for existing record, caller can free @record safely.
@@ -194,37 +193,11 @@ int btrfs_qgroup_trace_extent_nolock(
struct btrfs_qgroup_extent_record *record);
/*
- * Post handler after qgroup_trace_extent_nolock().
- *
- * NOTE: Current qgroup does the expensive backref walk at transaction
- * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming
- * new transaction.
- * This is designed to allow btrfs_find_all_roots() to get correct new_roots
- * result.
- *
- * However for old_roots there is no need to do backref walk at that time,
- * since we search commit roots to walk backref and result will always be
- * correct.
- *
- * Due to the nature of no lock version, we can't do backref there.
- * So we must call btrfs_qgroup_trace_extent_post() after exiting
- * spinlock context.
- *
- * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result
- * using current root, then we can move all expensive backref walk out of
- * transaction committing, but not now as qgroup accounting will be wrong again.
- */
-int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_extent_record *qrecord);
-
-/*
* Inform qgroup to trace one dirty extent, specified by @bytenr and
* @num_bytes.
* So qgroup can account it at commit trans time.
*
- * Better encapsulated version, with memory allocation and backref walk for
- * commit roots.
- * So this can sleep.
+ * Better encapsulated version.
*
* Return 0 if the operation is done.
* Return <0 for error, like memory allocation failure or invalid parameter