Blob Blame History Raw
From: Jeff Mahoney <jeffm@suse.com>
Subject: btrfs: suspend qgroups during relocation recovery
References: bsc#1086467
Patch-mainline: Never, this is a workaround

Commit 824d8dff884 (btrfs: qgroup: Fix qgroup data leaking by using
subtree tracing) does the right, albeit slow, thing during relocation to
ensure qgroup correctness.  If a system crashes while relocation is
underway and there are many qgroups, the qgroup accounting for the
entire file system can end up in a single transaction that may take
an unreasonable time to run -- especially as relocation is done at
mount time in the mount process context.  The complexity of the backref
resolution may also force smaller systems out of memory.

This patch suspends qgroups while relocation recovery occurs and
resumes when it completes, kicking off the rescan worker as it does
for any other qgroup inconsistency.  The rescan worker splits up the
work into smaller transactions which commit in the background.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
 fs/btrfs/qgroup.c     |   79 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/qgroup.h     |    6 +++
 fs/btrfs/relocation.c |   16 ++++++++++
 3 files changed, 101 insertions(+)

--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3270,6 +3270,85 @@ btrfs_qgroup_rescan(struct btrfs_fs_info
 	return 0;
 }
 
+/*
+ * Suspend runtime quota activity.
+ *
+ * Disabling quotas normally cleans up the quota tree, which removes
+ * relations that define nested qgroups as well as the stale usage
+ * numbers.  Suspending quotas leaves the disk alone other than
+ * marking quotas inconsistent.  If the system crashes, a rescan
+ * will be needed just as if quotas were inconsistent for any other
+ * reason.
+ *
+ * This is intended only be used by the relocation recovery code to
+ * work around qgroups attempting to do the accounting for many entire
+ * trees in a single transaction.
+ *
+ * The rescan worker cannot be running.
+ *
+ * Returns:
+ * < 0 on error
+ * 0 when quotas were not enabled
+ * 1 when quotas were suspended
+ */
+int btrfs_quota_suspend(struct btrfs_fs_info *fs_info)
+{
+	struct btrfs_trans_handle *trans;
+	int ret = 0;
+	int err = 0;
+
+	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
+		return 0;
+
+	mutex_lock(&fs_info->qgroup_rescan_lock);
+	/*
+	 * Cancel any pending rescan since we'll need to start over when
+	 * we enable again.
+	 */
+	fs_info->qgroup_rescan_ready = false;
+
+	/* This shouldn't happen */
+	if (WARN_ON(fs_info->qgroup_rescan_running))
+		ret = -EBUSY;
+	mutex_unlock(&fs_info->qgroup_rescan_lock);
+	if (ret)
+		return ret;
+
+	trans = btrfs_start_transaction(fs_info->quota_root, 1);
+	if (IS_ERR(trans))
+		return PTR_ERR(trans); /* Quotas are disabled */
+
+	spin_lock(&fs_info->qgroup_lock);
+	fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+	spin_unlock(&fs_info->qgroup_lock);
+
+	set_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags);
+	err = update_qgroup_status_item(trans, fs_info, fs_info->quota_root);
+	ret = btrfs_commit_transaction(trans);
+	if (ret || err)
+		return ret ?: err;
+
+	return 1;
+}
+
+/*
+ * Resume quotas after suspending.
+ *
+ * Like the above, this is only intended for use by the relocation recovery
+ * code.  Quotas will have already been set up but we still haven't loaded
+ * the fs trees.  We setup rescan but don't queue it.  It will be queued
+ * as normal further in the mount/remount process.
+ */
+void btrfs_quota_resume(struct btrfs_fs_info *fs_info)
+{
+	/*
+	 * BTRFS_FS_QUOTA_ENABLING kicks off the rescan thread.
+	 * Relocation recovery is way too early to do that.
+	 */
+	set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
+	WARN_ON(qgroup_rescan_init(fs_info, 0, 1));
+}
+
 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
 				     bool interruptible)
 {
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -413,5 +413,11 @@ int btrfs_qgroup_add_swapped_blocks(stru
 		u64 last_snapshot);
 int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
 		struct btrfs_root *root, struct extent_buffer *eb);
+/*
+ * These are only used as a workaround for relocation recovery since it
+ * degrades into accounting the entire tree at once.
+ */
+int btrfs_quota_suspend(struct btrfs_fs_info *fs_info);
+void btrfs_quota_resume(struct btrfs_fs_info *fs_info);
 
 #endif /* __BTRFS_QGROUP__ */
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4907,6 +4907,7 @@ int btrfs_recover_relocation(struct btrf
 	struct extent_buffer *leaf;
 	struct reloc_control *rc = NULL;
 	struct btrfs_trans_handle *trans;
+	bool resume_qgroups = false;
 	int ret;
 	int err = 0;
 
@@ -4974,6 +4975,15 @@ int btrfs_recover_relocation(struct btrf
 	if (list_empty(&reloc_roots))
 		goto out;
 
+	err = btrfs_quota_suspend(fs_info);
+	if (err < 0)
+		goto out;
+	if (err > 0) {
+		btrfs_info(fs_info,
+			   "suspended qgroups for relocation recovery");
+		resume_qgroups = true;
+	}
+
 	rc = alloc_reloc_control(fs_info);
 	if (!rc) {
 		err = -ENOMEM;
@@ -5033,6 +5043,12 @@ int btrfs_recover_relocation(struct btrf
 		err = PTR_ERR(trans);
 		goto out_clean;
 	}
+
+	if (resume_qgroups) {
+		btrfs_info(fs_info,
+			   "resuming qgroups after relocation recovery");
+		btrfs_quota_resume(fs_info);
+	}
 	err = btrfs_commit_transaction(trans);
 
 out_clean: