From: Qu Wenruo <quwenruo@cn.fujitsu.com>
Subject: REVERT: btrfs: qgroup: Move half of the qgroup accounting time out of commit
trans
Patch-mainline: No, reverted patch
References: bsc#1083684
This patch reverts the following commit. It introduces tree locking
deadlocks when resolving references.
X-Git-commit: fb235dc06fac9eaa4408ade9c8b20d45d63c89b7
Just as Filipe pointed out, the most time consuming parts of qgroup are
btrfs_qgroup_account_extents() and
btrfs_qgroup_prepare_account_extents().
Which both call btrfs_find_all_roots() to get old_roots and new_roots
ulist.
What makes things worse is, we're calling that expensive
btrfs_find_all_roots() at transaction committing time with
TRANS_STATE_COMMIT_DOING, which will blocks all incoming transaction.
Such behavior is necessary for @new_roots search as current
btrfs_find_all_roots() can't do it correctly so we do call it just
before switch commit roots.
However for @old_roots search, it's not necessary as such search is
based on commit_root, so it will always be correct and we can move it
out of transaction committing.
This patch moves the @old_roots search part out of
commit_transaction(), so in theory we can half the time qgroup time
consumption at commit_transaction().
But please note that, this won't speedup qgroup overall, the total time
consumption is still the same, just reduce the performance stall.
Cc: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Acked-by: Jeff Mahoney <jeffm@suse.com>
---
---
fs/btrfs/delayed-ref.c | 19 ++++---------------
fs/btrfs/qgroup.c | 30 +++---------------------------
fs/btrfs/qgroup.h | 33 +++------------------------------
3 files changed, 10 insertions(+), 72 deletions(-)
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -821,11 +821,10 @@ add_delayed_ref_head(struct btrfs_fs_inf
struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head_ref,
struct btrfs_qgroup_extent_record *qrecord,
- int action, int *qrecord_inserted_ret)
+ int action)
{
struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_root *delayed_refs;
- int qrecord_inserted = 0;
delayed_refs = &trans->transaction->delayed_refs;
@@ -834,8 +833,6 @@ add_delayed_ref_head(struct btrfs_fs_inf
if (btrfs_qgroup_trace_extent_nolock(fs_info,
delayed_refs, qrecord))
kfree(qrecord);
- else
- qrecord_inserted = 1;
}
trace_add_delayed_ref_head(fs_info, head_ref, action);
@@ -867,8 +864,6 @@ add_delayed_ref_head(struct btrfs_fs_inf
atomic_inc(&delayed_refs->num_entries);
trans->delayed_ref_updates++;
}
- if (qrecord_inserted_ret)
- *qrecord_inserted_ret = qrecord_inserted;
return head_ref;
}
@@ -938,7 +933,6 @@ int btrfs_add_delayed_tree_ref(struct bt
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
- int qrecord_inserted;
bool is_system;
int action = generic_ref->action;
int level = generic_ref->tree_ref.level;
@@ -992,7 +986,7 @@ int btrfs_add_delayed_tree_ref(struct bt
* the spin lock
*/
head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
- action, &qrecord_inserted);
+ action);
ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
spin_unlock(&delayed_refs->lock);
@@ -1009,8 +1003,6 @@ int btrfs_add_delayed_tree_ref(struct bt
if (ret > 0)
kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
- if (qrecord_inserted)
- return btrfs_qgroup_trace_extent_post(fs_info, record);
return 0;
free_head_ref:
@@ -1032,7 +1024,6 @@ int btrfs_add_delayed_data_ref(struct bt
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
- int qrecord_inserted;
int action = generic_ref->action;
int ret;
u64 bytenr = generic_ref->bytenr;
@@ -1091,7 +1082,7 @@ int btrfs_add_delayed_data_ref(struct bt
* the spin lock
*/
head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
- action, &qrecord_inserted);
+ action);
ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
spin_unlock(&delayed_refs->lock);
@@ -1108,8 +1099,6 @@ int btrfs_add_delayed_data_ref(struct bt
if (ret > 0)
kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
- if (qrecord_inserted)
- return btrfs_qgroup_trace_extent_post(fs_info, record);
return 0;
}
@@ -1134,7 +1123,7 @@ int btrfs_add_delayed_extent_op(struct b
spin_lock(&delayed_refs->lock);
add_delayed_ref_head(fs_info, trans, head_ref, NULL,
- BTRFS_UPDATE_DELAYED_HEAD, NULL);
+ BTRFS_UPDATE_DELAYED_HEAD);
spin_unlock(&delayed_refs->lock);
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1506,28 +1506,6 @@ int btrfs_qgroup_trace_extent_nolock(str
return 0;
}
-int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_extent_record *qrecord)
-{
- struct ulist *old_root;
- u64 bytenr = qrecord->bytenr;
- int ret;
-
- ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false);
- if (ret < 0)
- return ret;
-
- /*
- * Here we don't need to get the lock of
- * trans->transaction->delayed_refs, since inserted qrecord won't
- * be deleted, only qrecord->node may be modified (new qrecord insert)
- *
- * So modifying qrecord->old_roots is safe here
- */
- qrecord->old_roots = old_root;
- return 0;
-}
-
int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
gfp_t gfp_flag)
@@ -1553,11 +1531,9 @@ int btrfs_qgroup_trace_extent(struct btr
spin_lock(&delayed_refs->lock);
ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
spin_unlock(&delayed_refs->lock);
- if (ret > 0) {
+ if (ret > 0)
kfree(record);
- return 0;
- }
- return btrfs_qgroup_trace_extent_post(fs_info, record);
+ return 0;
}
int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
@@ -2456,7 +2432,7 @@ int btrfs_qgroup_account_extents(struct
* Old roots should be searched when inserting qgroup
* extent record
*/
- if (WARN_ON(!record->old_roots)) {
+ if (!record->old_roots) {
/* Search commit root to find old_roots */
ret = btrfs_find_all_roots(NULL, fs_info,
record->bytenr, 0,
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -270,10 +270,9 @@ struct btrfs_delayed_extent_op;
/*
* Inform qgroup to trace one dirty extent, its info is recorded in @record.
- * So qgroup can account it at transaction committing time.
+ * So qgroup can account it at commit trans time.
*
- * No lock version, caller must acquire delayed ref lock and allocated memory,
- * then call btrfs_qgroup_trace_extent_post() after exiting lock context.
+ * No lock version, caller must acquire delayed ref lock and allocate memory.
*
* Return 0 for success insert
* Return >0 for existing record, caller can free @record safely.
@@ -285,37 +284,11 @@ int btrfs_qgroup_trace_extent_nolock(
struct btrfs_qgroup_extent_record *record);
/*
- * Post handler after qgroup_trace_extent_nolock().
- *
- * NOTE: Current qgroup does the expensive backref walk at transaction
- * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming
- * new transaction.
- * This is designed to allow btrfs_find_all_roots() to get correct new_roots
- * result.
- *
- * However for old_roots there is no need to do backref walk at that time,
- * since we search commit roots to walk backref and result will always be
- * correct.
- *
- * Due to the nature of no lock version, we can't do backref there.
- * So we must call btrfs_qgroup_trace_extent_post() after exiting
- * spinlock context.
- *
- * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result
- * using current root, then we can move all expensive backref walk out of
- * transaction committing, but not now as qgroup accounting will be wrong again.
- */
-int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup_extent_record *qrecord);
-
-/*
* Inform qgroup to trace one dirty extent, specified by @bytenr and
* @num_bytes.
* So qgroup can account it at commit trans time.
*
- * Better encapsulated version, with memory allocation and backref walk for
- * commit roots.
- * So this can sleep.
+ * Better encapsulated version.
*
* Return 0 if the operation is done.
* Return <0 for error, like memory allocation failure or invalid parameter