Blob Blame History Raw
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 6 Feb 2018 20:40:40 +0000
Subject: Btrfs: send, do not issue unnecessary truncate operations
Git-commit: ffa7c4296e93b25fc302c209ae488e57aad4c973
Patch-mainline: v4.17-rc1
References: bsc#1182173

When send finishes processing an inode representing a regular file, it
always issues a truncate operation for that file, even if its size did
not change or the last write sets the file size correctly. In the most
common cases, the issued write operations set the file to correct size
(either full or incremental sends) or the file size did not change (for
incremental sends), so the only case where a truncate operation is needed
is when a file size becomes smaller in the send snapshot when compared
to the parent snapshot.

By not issuing unnecessary truncate operations we reduce the stream size
and save time in the receiver. Currently truncating a file to the same
size triggers writeback of its last page (if it's dirty) and waits for it
to complete (only if the file size is not aligned with the filesystem's
sector size). This is being fixed by another patch and is independent of
this change (that patch's title is "Btrfs: skip writeback of last page
when truncating file to same size").

The following script was used to measure time spent by a receiver without
this change applied, with this change applied, and without this change and
with the truncate fix applied (the fix to not make it start and wait for
writeback to complete).

  $ cat test_send.sh
  #!/bin/bash

  SRC_DEV=/dev/sdc
  DST_DEV=/dev/sdd
  SRC_MNT=/mnt/sdc
  DST_MNT=/mnt/sdd

  mkfs.btrfs -f $SRC_DEV >/dev/null
  mkfs.btrfs -f $DST_DEV >/dev/null
  mount $SRC_DEV $SRC_MNT
  mount $DST_DEV $DST_MNT

  echo "Creating source filesystem"
  for ((t = 0; t < 10; t++)); do
      (
          for ((i = 1; i <= 20000; i++)); do
              xfs_io -f -c "pwrite -S 0xab 0 5000" \
                  $SRC_MNT/file_$i > /dev/null
          done
      ) &
     worker_pids[$t]=$!
  done
  wait ${worker_pids[@]}

  echo "Creating and sending snapshot"
  btrfs subvolume snapshot -r $SRC_MNT $SRC_MNT/snap1 >/dev/null
  /usr/bin/time -f "send took %e seconds"    \
         btrfs send -f $SRC_MNT/send_file $SRC_MNT/snap1
  /usr/bin/time -f "receive took %e seconds" \
         btrfs receive -f $SRC_MNT/send_file $DST_MNT

  umount $SRC_MNT
  umount $DST_MNT

The results, which are averages for 5 runs for each case, were the
following:

* Without this change

average receive time was 26.49 seconds
standard deviation of 2.53 seconds

* Without this change and with the truncate fix

average receive time was 12.51 seconds
standard deviation of 0.32 seconds

* With this change and without the truncate fix

average receive time was 10.02 seconds
standard deviation of 1.11 seconds

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Acked-by: Nikolay Borisov <nborisov@suse.com>
---
 fs/btrfs/send.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 085542832b9a..6615d849f0f0 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -112,6 +112,7 @@ struct send_ctx {
 	u64 cur_inode_mode;
 	u64 cur_inode_rdev;
 	u64 cur_inode_last_extent;
+	u64 cur_inode_next_write_offset;

 	u64 send_progress;

@@ -5030,6 +5031,7 @@ static int send_hole(struct send_ctx *sctx, u64 end)
 			break;
 		offset += len;
 	}
+	sctx->cur_inode_next_write_offset = offset;
 tlv_put_failure:
 	fs_path_free(p);
 	return ret;
@@ -5265,6 +5267,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
 	} else {
 		ret = send_extent_data(sctx, offset, len);
 	}
+	sctx->cur_inode_next_write_offset = offset + len;
 out:
 	return ret;
 }
@@ -5789,6 +5792,7 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
 	u64 right_gid;
 	int need_chmod = 0;
 	int need_chown = 0;
+	int need_truncate = 1;
 	int pending_move = 0;
 	int refs_processed = 0;

@@ -5826,9 +5830,13 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
 		need_chown = 1;
 		if (!S_ISLNK(sctx->cur_inode_mode))
 			need_chmod = 1;
+		if (sctx->cur_inode_next_write_offset == sctx->cur_inode_size)
+			need_truncate = 0;
 	} else {
+		u64 old_size;
+
 		ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
-				NULL, NULL, &right_mode, &right_uid,
+				&old_size, NULL, &right_mode, &right_uid,
 				&right_gid, NULL);
 		if (ret < 0)
 			goto out;
@@ -5837,6 +5845,10 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
 			need_chown = 1;
 		if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
 			need_chmod = 1;
+		if ((old_size == sctx->cur_inode_size) ||
+		    (sctx->cur_inode_size > old_size &&
+		     sctx->cur_inode_next_write_offset == sctx->cur_inode_size))
+			need_truncate = 0;
 	}

 	if (S_ISREG(sctx->cur_inode_mode)) {
@@ -5855,10 +5867,13 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
 					goto out;
 			}
 		}
-		ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
-				sctx->cur_inode_size);
-		if (ret < 0)
-			goto out;
+		if (need_truncate) {
+			ret = send_truncate(sctx, sctx->cur_ino,
+					    sctx->cur_inode_gen,
+					    sctx->cur_inode_size);
+			if (ret < 0)
+				goto out;
+		}
 	}

 	if (need_chown) {
@@ -5912,6 +5927,7 @@ static int changed_inode(struct send_ctx *sctx,
 	sctx->cur_ino = key->objectid;
 	sctx->cur_inode_new_gen = 0;
 	sctx->cur_inode_last_extent = (u64)-1;
+	sctx->cur_inode_next_write_offset = 0;

 	/*
 	 * Set send_progress to current inode. This will tell all get_cur_xxx