Blob Blame History Raw
From: Qu Wenruo <quwenruo@cn.fujitsu.com>
Subject: REVERT: btrfs: qgroup: Move half of the qgroup accounting time out of commit
 trans
Patch-mainline: No, reverted patch
References: bsc#1083684

This patch reverts the following commit.  It introduces tree locking
deadlocks when resolving references.

    X-Git-commit: fb235dc06fac9eaa4408ade9c8b20d45d63c89b7
    Just as Filipe pointed out, the most time consuming parts of qgroup are
    btrfs_qgroup_account_extents() and
    btrfs_qgroup_prepare_account_extents().
    Which both call btrfs_find_all_roots() to get old_roots and new_roots
    ulist.

    What makes things worse is, we're calling that expensive
    btrfs_find_all_roots() at transaction committing time with
    TRANS_STATE_COMMIT_DOING, which will blocks all incoming transaction.

    Such behavior is necessary for @new_roots search as current
    btrfs_find_all_roots() can't do it correctly so we do call it just
    before switch commit roots.

    However for @old_roots search, it's not necessary as such search is
    based on commit_root, so it will always be correct and we can move it
    out of transaction committing.

    This patch moves the @old_roots search part out of
    commit_transaction(), so in theory we can half the time qgroup time
    consumption at commit_transaction().

    But please note that, this won't speedup qgroup overall, the total time
    consumption is still the same, just reduce the performance stall.

    Cc: Filipe Manana <fdmanana@suse.com>
    Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
    Reviewed-by: Filipe Manana <fdmanana@suse.com>
    Signed-off-by: David Sterba <dsterba@suse.com>
Acked-by: Jeff Mahoney <jeffm@suse.com>
---
---
 fs/btrfs/delayed-ref.c |   22 ++++------------------
 fs/btrfs/qgroup.c      |   30 +++---------------------------
 fs/btrfs/qgroup.h      |   33 +++------------------------------
 3 files changed, 10 insertions(+), 75 deletions(-)

--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -636,13 +636,11 @@ add_delayed_ref_head(struct btrfs_fs_inf
 		     struct btrfs_trans_handle *trans,
 		     struct btrfs_delayed_ref_head *head_ref,
 		     struct btrfs_qgroup_extent_record *qrecord,
-		     int action, int *qrecord_inserted_ret,
-		     int *old_ref_mod, int *new_ref_mod)
+		     int action, int *old_ref_mod, int *new_ref_mod)
 
 {
 	struct btrfs_delayed_ref_head *existing;
 	struct btrfs_delayed_ref_root *delayed_refs;
-	int qrecord_inserted = 0;
 
 	delayed_refs = &trans->transaction->delayed_refs;
 
@@ -651,8 +649,6 @@ add_delayed_ref_head(struct btrfs_fs_inf
 		if (btrfs_qgroup_trace_extent_nolock(fs_info,
 					delayed_refs, qrecord))
 			kfree(qrecord);
-		else
-			qrecord_inserted = 1;
 	}
 
 	trace_add_delayed_ref_head(fs_info, head_ref, action);
@@ -682,8 +678,6 @@ add_delayed_ref_head(struct btrfs_fs_inf
 		atomic_inc(&delayed_refs->num_entries);
 		trans->delayed_ref_updates++;
 	}
-	if (qrecord_inserted_ret)
-		*qrecord_inserted_ret = qrecord_inserted;
 	if (new_ref_mod)
 		*new_ref_mod = head_ref->total_ref_mod;
 
@@ -756,7 +750,6 @@ int btrfs_add_delayed_tree_ref(struct bt
 	struct btrfs_delayed_ref_head *head_ref;
 	struct btrfs_delayed_ref_root *delayed_refs;
 	struct btrfs_qgroup_extent_record *record = NULL;
-	int qrecord_inserted;
 	bool is_system;
 	int action = generic_ref->action;
 	int level = generic_ref->tree_ref.level;
@@ -810,8 +803,7 @@ int btrfs_add_delayed_tree_ref(struct bt
 	 * the spin lock
 	 */
 	head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
-					action, &qrecord_inserted,
-					old_ref_mod, new_ref_mod);
+					action, old_ref_mod, new_ref_mod);
 
 
 	ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
@@ -829,8 +821,6 @@ int btrfs_add_delayed_tree_ref(struct bt
 	if (ret > 0)
 		kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
 
-	if (qrecord_inserted)
-		return btrfs_qgroup_trace_extent_post(fs_info, record);
 	return 0;
 
 free_head_ref:
@@ -853,7 +843,6 @@ int btrfs_add_delayed_data_ref(struct bt
 	struct btrfs_delayed_ref_head *head_ref;
 	struct btrfs_delayed_ref_root *delayed_refs;
 	struct btrfs_qgroup_extent_record *record = NULL;
-	int qrecord_inserted;
 	int action = generic_ref->action;
 	int ret;
 	u64 bytenr = generic_ref->bytenr;
@@ -912,8 +901,7 @@ int btrfs_add_delayed_data_ref(struct bt
 	 * the spin lock
 	 */
 	head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
-					action, &qrecord_inserted,
-					old_ref_mod, new_ref_mod);
+					action, old_ref_mod, new_ref_mod);
 
 	ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
 	spin_unlock(&delayed_refs->lock);
@@ -930,8 +918,6 @@ int btrfs_add_delayed_data_ref(struct bt
 	if (ret > 0)
 		kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
 
-	if (qrecord_inserted)
-		return btrfs_qgroup_trace_extent_post(fs_info, record);
 	return 0;
 }
 
@@ -957,7 +943,7 @@ int btrfs_add_delayed_extent_op(struct b
 
 	add_delayed_ref_head(fs_info, trans, head_ref, NULL,
 			     BTRFS_UPDATE_DELAYED_HEAD,
-			     NULL, NULL, NULL);
+			     NULL, NULL);
 
 	spin_unlock(&delayed_refs->lock);
 
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1507,28 +1507,6 @@ int btrfs_qgroup_trace_extent_nolock(str
 	return 0;
 }
 
-int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
-				   struct btrfs_qgroup_extent_record *qrecord)
-{
-	struct ulist *old_root;
-	u64 bytenr = qrecord->bytenr;
-	int ret;
-
-	ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root);
-	if (ret < 0)
-		return ret;
-
-	/*
-	 * Here we don't need to get the lock of
-	 * trans->transaction->delayed_refs, since inserted qrecord won't
-	 * be deleted, only qrecord->node may be modified (new qrecord insert)
-	 *
-	 * So modifying qrecord->old_roots is safe here
-	 */
-	qrecord->old_roots = old_root;
-	return 0;
-}
-
 int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
 		struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
 		gfp_t gfp_flag)
@@ -1554,11 +1532,9 @@ int btrfs_qgroup_trace_extent(struct btr
 	spin_lock(&delayed_refs->lock);
 	ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
 	spin_unlock(&delayed_refs->lock);
-	if (ret > 0) {
+	if (ret > 0)
 		kfree(record);
-		return 0;
-	}
-	return btrfs_qgroup_trace_extent_post(fs_info, record);
+	return 0;
 }
 
 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
@@ -2457,7 +2433,7 @@ int btrfs_qgroup_account_extents(struct
 			 * Old roots should be searched when inserting qgroup
 			 * extent record
 			 */
-			if (WARN_ON(!record->old_roots)) {
+			if (!record->old_roots) {
 				/* Search commit root to find old_roots */
 				ret = btrfs_find_all_roots(NULL, fs_info,
 						record->bytenr, 0,
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -270,10 +270,9 @@ struct btrfs_delayed_extent_op;
 
 /*
  * Inform qgroup to trace one dirty extent, its info is recorded in @record.
- * So qgroup can account it at transaction committing time.
+ * So qgroup can account it at commit trans time.
  *
- * No lock version, caller must acquire delayed ref lock and allocated memory,
- * then call btrfs_qgroup_trace_extent_post() after exiting lock context.
+ * No lock version, caller must acquire delayed ref lock and allocate memory.
  *
  * Return 0 for success insert
  * Return >0 for existing record, caller can free @record safely.
@@ -285,37 +284,11 @@ int btrfs_qgroup_trace_extent_nolock(
 		struct btrfs_qgroup_extent_record *record);
 
 /*
- * Post handler after qgroup_trace_extent_nolock().
- *
- * NOTE: Current qgroup does the expensive backref walk at transaction
- * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming
- * new transaction.
- * This is designed to allow btrfs_find_all_roots() to get correct new_roots
- * result.
- *
- * However for old_roots there is no need to do backref walk at that time,
- * since we search commit roots to walk backref and result will always be
- * correct.
- *
- * Due to the nature of no lock version, we can't do backref there.
- * So we must call btrfs_qgroup_trace_extent_post() after exiting
- * spinlock context.
- *
- * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result
- * using current root, then we can move all expensive backref walk out of
- * transaction committing, but not now as qgroup accounting will be wrong again.
- */
-int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
-				   struct btrfs_qgroup_extent_record *qrecord);
-
-/*
  * Inform qgroup to trace one dirty extent, specified by @bytenr and
  * @num_bytes.
  * So qgroup can account it at commit trans time.
  *
- * Better encapsulated version, with memory allocation and backref walk for
- * commit roots.
- * So this can sleep.
+ * Better encapsulated version.
  *
  * Return 0 if the operation is done.
  * Return <0 for error, like memory allocation failure or invalid parameter