Blob Blame History Raw
From: Qu Wenruo <quwenruo@cn.fujitsu.com>
Subject: REVERT: btrfs: qgroup: Move half of the qgroup accounting time out of commit
 trans
Patch-mainline: No, reverted patch
References: bsc#1083684

This patch reverts the following commit.  It introduces tree locking
deadlocks when resolving references.

    X-Git-commit: fb235dc06fac9eaa4408ade9c8b20d45d63c89b7
    Just as Filipe pointed out, the most time consuming parts of qgroup are
    btrfs_qgroup_account_extents() and
    btrfs_qgroup_prepare_account_extents().
    Which both call btrfs_find_all_roots() to get old_roots and new_roots
    ulist.

    What makes things worse is, we're calling that expensive
    btrfs_find_all_roots() at transaction committing time with
    TRANS_STATE_COMMIT_DOING, which will blocks all incoming transaction.

    Such behavior is necessary for @new_roots search as current
    btrfs_find_all_roots() can't do it correctly so we do call it just
    before switch commit roots.

    However for @old_roots search, it's not necessary as such search is
    based on commit_root, so it will always be correct and we can move it
    out of transaction committing.

    This patch moves the @old_roots search part out of
    commit_transaction(), so in theory we can half the time qgroup time
    consumption at commit_transaction().

    But please note that, this won't speedup qgroup overall, the total time
    consumption is still the same, just reduce the performance stall.

    Cc: Filipe Manana <fdmanana@suse.com>
    Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
    Reviewed-by: Filipe Manana <fdmanana@suse.com>
    Signed-off-by: David Sterba <dsterba@suse.com>
Acked-by: Jeff Mahoney <jeffm@suse.com>
---
---
 fs/btrfs/delayed-ref.c |   20 +++-----------------
 fs/btrfs/qgroup.c      |   30 +++---------------------------
 fs/btrfs/qgroup.h      |   33 +++------------------------------
 3 files changed, 9 insertions(+), 74 deletions(-)

--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -558,7 +558,6 @@ add_delayed_ref_head(struct btrfs_fs_inf
 		     struct btrfs_qgroup_extent_record *qrecord,
 		     u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
 		     int action, int is_data, int is_system,
-		     int *qrecord_inserted_ret,
 		     int *old_ref_mod, int *new_ref_mod)
 
 {
@@ -567,7 +566,6 @@ add_delayed_ref_head(struct btrfs_fs_inf
 	struct btrfs_delayed_ref_root *delayed_refs;
 	int count_mod = 1;
 	int must_insert_reserved = 0;
-	int qrecord_inserted = 0;
 
 	/* If reserved is provided, it must be a data extent. */
 	BUG_ON(!is_data && reserved);
@@ -635,8 +633,6 @@ add_delayed_ref_head(struct btrfs_fs_inf
 		if(btrfs_qgroup_trace_extent_nolock(fs_info,
 					delayed_refs, qrecord))
 			kfree(qrecord);
-		else
-			qrecord_inserted = 1;
 	}
 
 	spin_lock_init(&head_ref->lock);
@@ -667,8 +663,6 @@ add_delayed_ref_head(struct btrfs_fs_inf
 		atomic_inc(&delayed_refs->num_entries);
 		trans->delayed_ref_updates++;
 	}
-	if (qrecord_inserted_ret)
-		*qrecord_inserted_ret = qrecord_inserted;
 	if (new_ref_mod)
 		*new_ref_mod = head_ref->total_ref_mod;
 	return head_ref;
@@ -801,7 +795,6 @@ int btrfs_add_delayed_tree_ref(struct bt
 	struct btrfs_delayed_ref_head *head_ref;
 	struct btrfs_delayed_ref_root *delayed_refs;
 	struct btrfs_qgroup_extent_record *record = NULL;
-	int qrecord_inserted;
 	int is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
 
 	BUG_ON(extent_op && extent_op->is_data);
@@ -831,15 +824,12 @@ int btrfs_add_delayed_tree_ref(struct bt
 	 */
 	head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
 					bytenr, num_bytes, 0, 0, action, 0,
-					is_system, &qrecord_inserted,
-					old_ref_mod, new_ref_mod);
+					is_system, old_ref_mod, new_ref_mod);
 
 	add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
 			     num_bytes, parent, ref_root, level, action);
 	spin_unlock(&delayed_refs->lock);
 
-	if (qrecord_inserted)
-		return btrfs_qgroup_trace_extent_post(fs_info, record);
 	return 0;
 
 free_head_ref:
@@ -864,7 +854,6 @@ int btrfs_add_delayed_data_ref(struct bt
 	struct btrfs_delayed_ref_head *head_ref;
 	struct btrfs_delayed_ref_root *delayed_refs;
 	struct btrfs_qgroup_extent_record *record = NULL;
-	int qrecord_inserted;
 
 	ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
 	if (!ref)
@@ -898,16 +887,13 @@ int btrfs_add_delayed_data_ref(struct bt
 	 */
 	head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
 					bytenr, num_bytes, ref_root, reserved,
-					action, 1, 0, &qrecord_inserted,
-					old_ref_mod, new_ref_mod);
+					action, 1, 0, old_ref_mod, new_ref_mod);
 
 	add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
 				   num_bytes, parent, ref_root, owner, offset,
 				   action);
 	spin_unlock(&delayed_refs->lock);
 
-	if (qrecord_inserted)
-		return btrfs_qgroup_trace_extent_post(fs_info, record);
 	return 0;
 }
 
@@ -935,7 +921,7 @@ int btrfs_add_delayed_extent_op(struct b
 	 */
 	add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr,
 			     num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
-			     extent_op->is_data, 0,  NULL, NULL, NULL);
+			     extent_op->is_data, 0, NULL, NULL);
 
 	spin_unlock(&delayed_refs->lock);
 	return 0;
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1501,28 +1501,6 @@ int btrfs_qgroup_trace_extent_nolock(str
 	return 0;
 }
 
-int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
-				   struct btrfs_qgroup_extent_record *qrecord)
-{
-	struct ulist *old_root;
-	u64 bytenr = qrecord->bytenr;
-	int ret;
-
-	ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root);
-	if (ret < 0)
-		return ret;
-
-	/*
-	 * Here we don't need to get the lock of
-	 * trans->transaction->delayed_refs, since inserted qrecord won't
-	 * be deleted, only qrecord->node may be modified (new qrecord insert)
-	 *
-	 * So modifying qrecord->old_roots is safe here
-	 */
-	qrecord->old_roots = old_root;
-	return 0;
-}
-
 int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
 		struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
 		gfp_t gfp_flag)
@@ -1548,11 +1526,9 @@ int btrfs_qgroup_trace_extent(struct btr
 	spin_lock(&delayed_refs->lock);
 	ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
 	spin_unlock(&delayed_refs->lock);
-	if (ret > 0) {
+	if (ret > 0)
 		kfree(record);
-		return 0;
-	}
-	return btrfs_qgroup_trace_extent_post(fs_info, record);
+	return 0;
 }
 
 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
@@ -2097,7 +2073,7 @@ int btrfs_qgroup_account_extents(struct
 			 * Old roots should be searched when inserting qgroup
 			 * extent record
 			 */
-			if (WARN_ON(!record->old_roots)) {
+			if (!record->old_roots) {
 				/* Search commit root to find old_roots */
 				ret = btrfs_find_all_roots(NULL, fs_info,
 						record->bytenr, 0,
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -179,10 +179,9 @@ struct btrfs_delayed_extent_op;
 
 /*
  * Inform qgroup to trace one dirty extent, its info is recorded in @record.
- * So qgroup can account it at transaction committing time.
+ * So qgroup can account it at commit trans time.
  *
- * No lock version, caller must acquire delayed ref lock and allocated memory,
- * then call btrfs_qgroup_trace_extent_post() after exiting lock context.
+ * No lock version, caller must acquire delayed ref lock and allocate memory.
  *
  * Return 0 for success insert
  * Return >0 for existing record, caller can free @record safely.
@@ -194,37 +193,11 @@ int btrfs_qgroup_trace_extent_nolock(
 		struct btrfs_qgroup_extent_record *record);
 
 /*
- * Post handler after qgroup_trace_extent_nolock().
- *
- * NOTE: Current qgroup does the expensive backref walk at transaction
- * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming
- * new transaction.
- * This is designed to allow btrfs_find_all_roots() to get correct new_roots
- * result.
- *
- * However for old_roots there is no need to do backref walk at that time,
- * since we search commit roots to walk backref and result will always be
- * correct.
- *
- * Due to the nature of no lock version, we can't do backref there.
- * So we must call btrfs_qgroup_trace_extent_post() after exiting
- * spinlock context.
- *
- * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result
- * using current root, then we can move all expensive backref walk out of
- * transaction committing, but not now as qgroup accounting will be wrong again.
- */
-int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
-				   struct btrfs_qgroup_extent_record *qrecord);
-
-/*
  * Inform qgroup to trace one dirty extent, specified by @bytenr and
  * @num_bytes.
  * So qgroup can account it at commit trans time.
  *
- * Better encapsulated version, with memory allocation and backref walk for
- * commit roots.
- * So this can sleep.
+ * Better encapsulated version.
  *
  * Return 0 if the operation is done.
  * Return <0 for error, like memory allocation failure or invalid parameter