From 75ce0a496c6cbc49fa8e2315333cf1ba5bc5bd6e Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Wed, 10 Oct 2018 14:16:42 +0300
Subject: [PATCH 5/6] btrfs: Handle one more split-brain scenario during fsid
change
References: fate#325871
Patch-mainline: Submitted, awaiting review
This commit continues hardening the scanning code to handle cases where
power loss could have caused disks in a multi-disk filesystem to be
in inconsistent state. Namely handle the situation that can occur when
some of the disks in multi-disk fs have completed their fsid change i.e
they have METADATA_UUID incompat flag set, have cleared the
CHANGING_FSID_V2 flag and their fsid/metadata_uuid are different. At
the same time the other half of the disks will have their
fsid/metadata_uuid unchanged and will only have CHANGING_FSID_V2 flag.
This is handled by introducing code in the scan path which:
a) Handles the case when a device with CHANGING_FSID_V2 flag is
scanned and as a result btrfs_fs_devices is created with matching
fsid/metdata_uuid. Subsequently, when a device with completed fsid
change is scanned it will detect this via the new code in find_fsid
i.e that such an fs_devices exist that fsid_change flag is set to true,
it's metadata_uuid/fsid match and the metadata_uuid of the scanned
device matches that of the fs_devices. In this case, it's important to
note that the devices which has its fsid change completed will have a
higher generation number than the device with FSID_CHANGING_V2 flag
set, so its superblock block will be used during mount. To prevent an
assertion triggering because the sb used for mounting will have
differing fsid/metadata_uuid than the ones in the fs_devices struct
also add code in device_list_add which overwrites the values in
fs_devices.
b) Alternatively we can end up with a device that completed its
fsid change be scanned first which will create the respective
btrfs_fs_devices struct with differing fsid/metadata_uuid. In this
case when a device with FSID_CHANGING_V2 flag set is scanned it will
call the newly added find_fsid_inprogress function which will return
the correct fs_devices.
Signed-off-by: Nikolay Borisov <nborisov@suse.com>
---
fs/btrfs/volumes.c | 77 +++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 73 insertions(+), 4 deletions(-)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0c6fdb069f47..68f6bae04d12 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -285,6 +285,26 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid,
ASSERT(fsid);
+ if (metadata_uuid) {
+
+ /*
+ * Handle scanned device having completed its fsid change but
+ * belonging to a fs_devices that was created by first scanning
+ * a device which didn't have it's fsid/metadata_uuid changed
+ * at all and the CHANGING_FSID_V2 flag set.
+ */
+ list_for_each_entry(fs_devices, &fs_uuids, list) {
+ if (fs_devices->fsid_change &&
+ memcmp(metadata_uuid, fs_devices->fsid,
+ BTRFS_FSID_SIZE) == 0 &&
+ memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
+ BTRFS_FSID_SIZE) == 0) {
+ return fs_devices;
+ }
+ }
+ }
+
+ /* Handle non-split brain cases */
list_for_each_entry(fs_devices, &fs_uuids, list) {
if (metadata_uuid) {
if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0
@@ -615,6 +635,27 @@ void btrfs_free_stale_device(struct btrfs_device *cur_dev)
}
}
+/*
+ * Handle scanned device having its CHANGING_FSID_V2 flag set and the fs_devices
+ * being created with a disk that has already completed its fsid change.
+ */
+static struct btrfs_fs_devices *find_fsid_inprogress(
+ struct btrfs_super_block *disk_super)
+{
+ struct btrfs_fs_devices *fs_devices;
+
+ list_for_each_entry(fs_devices, &fs_uuids, list) {
+ if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
+ BTRFS_FSID_SIZE) != 0 &&
+ memcmp(fs_devices->metadata_uuid, disk_super->fsid,
+ BTRFS_FSID_SIZE) == 0 && !fs_devices->fsid_change) {
+ return fs_devices;
+ }
+ }
+
+ return NULL;
+}
+
/*
* Add new device to list of registered devices
*
@@ -628,7 +669,7 @@ static noinline int device_list_add(const char *path,
u64 devid, struct btrfs_fs_devices **fs_devices_ret)
{
struct btrfs_device *device;
- struct btrfs_fs_devices *fs_devices;
+ struct btrfs_fs_devices *fs_devices = NULL;
struct rcu_string *name;
int ret = 0;
u64 found_transid = btrfs_super_generation(disk_super);
@@ -637,10 +678,23 @@ static noinline int device_list_add(const char *path,
bool fsid_change_in_progress = (btrfs_super_flags(disk_super) &
BTRFS_SUPER_FLAG_CHANGING_FSID_V2);
- if (has_metadata_uuid)
- fs_devices = find_fsid(disk_super->fsid, disk_super->metadata_uuid);
- else
+ if (fsid_change_in_progress && !has_metadata_uuid) {
+ /*
+ * When we have an image which has CHANGING_FSID_V2 set it might
+ * belong to either a filesystem which has disks with completed
+ * fsid change or it might belong to fs with no uuid changes in
+ * effect, handle both.
+ */
+ fs_devices = find_fsid_inprogress(disk_super);
+ if (!fs_devices)
+ fs_devices = find_fsid(disk_super->fsid, NULL);
+
+ } else if (has_metadata_uuid) {
+ fs_devices = find_fsid(disk_super->fsid,
+ disk_super->metadata_uuid);
+ } else {
fs_devices = find_fsid(disk_super->fsid, NULL);
+ }
if (!fs_devices) {
if (has_metadata_uuid)
@@ -660,6 +714,21 @@ static noinline int device_list_add(const char *path,
} else {
device = __find_device(&fs_devices->devices, devid,
disk_super->dev_item.uuid);
+
+ /*
+ * If this disk has been pulled into an fs devices created by
+ * a device which had the FSID_CHANGING flag then replace the
+ * metadata_uuid/fsid values of the fs_devices.
+ */
+ if (has_metadata_uuid && fs_devices->fsid_change &&
+ found_transid > fs_devices->latest_generation) {
+ memcpy(fs_devices->fsid, disk_super->fsid,
+ BTRFS_FSID_SIZE);
+ memcpy(fs_devices->metadata_uuid,
+ disk_super->metadata_uuid, BTRFS_FSID_SIZE);
+
+ fs_devices->fsid_change = false;
+ }
}
if (!device) {
--
2.17.1