Blob Blame History Raw
From: Jeff Mahoney <jeffm@suse.com>
Subject: xfs: repair malformed inode items during log recovery
References: bsc#1105396
Patch-mainline: Never, SUSE-specific bugfix

The fix for bsc#1024788 introduced a regression in the form of
malformed inode items.  These items are handled fine by kernels
that have the patch for that issue applied, but once we fix it,
we'll need to handle the malformed items instead of complaining
about inode items being too large.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
 fs/xfs/libxfs/xfs_log_format.h |   56 +++++++++++++++++++++++++++++++++
 fs/xfs/xfs_log_priv.h          |    3 +
 fs/xfs/xfs_log_recover.c       |   68 +++++++++++++++++++++++++++++++++--------
 3 files changed, 115 insertions(+), 12 deletions(-)

--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -431,6 +431,62 @@
 	return offsetof(struct xfs_log_dinode, di_next_unlinked);
 }
 
+/* This is to compensate for a regression introduced in 3.12.74-60.64.40 */
+struct xfs_icdinode_malformed {
+	uint16_t	di_magic;	/* inode magic # = XFS_DINODE_MAGIC */
+	uint16_t	di_mode;	/* mode and type of file */
+	int8_t		di_version;	/* inode version */
+	int8_t		di_format;	/* format of di_c data */
+	uint8_t		di_pad3[2];	/* unused in v2/3 inodes */
+	uint32_t	di_uid;		/* owner's user id */
+	uint32_t	di_gid;		/* owner's group id */
+	uint32_t	di_nlink;	/* number of links to file */
+	uint16_t	di_projid_lo;	/* lower part of owner's project id */
+	uint16_t	di_projid_hi;	/* higher part of owner's project id */
+	uint8_t		di_pad[6];	/* unused, zeroed space */
+	uint16_t	di_flushiter;	/* incremented on flush */
+	xfs_ictimestamp_t di_atime;	/* time last accessed */
+	xfs_ictimestamp_t di_mtime;	/* time last modified */
+	xfs_ictimestamp_t di_ctime;	/* time created/inode modified */
+	xfs_fsize_t	di_size;	/* number of bytes in file */
+	xfs_rfsblock_t	di_nblocks;	/* # of direct & btree blocks used */
+	xfs_extlen_t	di_extsize;	/* basic/minimum extent size for file */
+	xfs_extnum_t	di_nextents;	/* number of extents in data fork */
+	xfs_aextnum_t	di_anextents;	/* number of extents in attribute fork*/
+	uint8_t		di_forkoff;	/* attr fork offs, <<3 for 64b align */
+	int8_t		di_aformat;	/* format of attr fork's data */
+	uint32_t	di_dmevmask;	/* DMIG event mask */
+	atomic_t	di_dmstate;	/* DMIG state info */
+	uint16_t	di_flags;	/* random flags, XFS_DIFLAG_... */
+	uint32_t	di_gen;		/* generation number */
+
+	/* di_next_unlinked is the only non-core field in the old dinode */
+	xfs_agino_t	di_next_unlinked;/* agi unlinked list ptr */
+
+	/* start of the extended dinode, writable fields */
+	uint32_t	di_crc;		/* CRC of the inode */
+	uint64_t	di_changecount;	/* number of attribute changes */
+	xfs_lsn_t	di_lsn;		/* flush sequence */
+	uint64_t	di_flags2;	/* more random flags */
+	uint32_t	di_cowextsize;	/* basic cow extent size for file */
+	uint8_t		di_pad2[12];	/* more padding for future expansion */
+
+	/* fields only written to during inode creation */
+	xfs_ictimestamp_t di_crtime;	/* time created */
+	xfs_ino_t	di_ino;		/* inode number */
+	uuid_t		di_uuid;	/* UUID of the filesystem */
+
+	/* structure must be padded to 64 bit alignment */
+};
+
+static inline uint xfs_icdinode_size_malformed(int version)
+{
+	if (version == 3)
+		return sizeof(struct xfs_icdinode_malformed);
+	return offsetof(struct xfs_icdinode_malformed, di_next_unlinked);
+}
+
+
 /*
  * Buffer Log Format defintions
  *
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -404,6 +404,9 @@
 #endif
 	/* log recovery lsn tracking (for buffer submission */
 	xfs_lsn_t		l_recovery_lsn;
+#ifndef __GENKSYMS__
+	int			l_malformed_inode_warning;
+#endif
 };
 
 #define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2906,6 +2906,46 @@
 	return error;
 }
 
+/*
+ * SUSE kernels 3.12.74-60.64.40 through 3.12.74-60.64.99 had a regression
+ * where we would write out malformed inode items that would in turn
+ * corrupt inodes.  Handle them gracefully here rather than forcing the
+ * user to zero out their log.
+ */
+STATIC void
+repair_malformed_inode_item(struct xlog *log, xfs_log_iovec_t *vec)
+{
+	struct xfs_icdinode_malformed *mal = vec->i_addr;
+	struct xfs_log_dinode *ldip = vec->i_addr;
+	struct xfs_log_dinode fixed;
+
+	if (!log->l_malformed_inode_warning) {
+		xfs_info(log->l_mp,
+			 "detected malformed inodes in log, repairing");
+		log->l_malformed_inode_warning = 1;
+	}
+
+	fixed.di_dmstate	= atomic_read(&mal->di_dmstate);
+	fixed.di_flags		= mal->di_flags;
+	fixed.di_gen		= mal->di_gen;
+	fixed.di_next_unlinked	= mal->di_next_unlinked;
+
+	if (ldip->di_version >= 3) {
+		fixed.di_crc		= mal->di_crc;
+		fixed.di_changecount	= mal->di_changecount;
+		fixed.di_lsn		= mal->di_lsn;
+		fixed.di_flags2		= mal->di_flags2;
+		fixed.di_crtime.t_sec	= mal->di_crtime.t_sec;
+		fixed.di_crtime.t_nsec	= mal->di_crtime.t_nsec;
+		fixed.di_ino		= mal->di_ino;
+		memcpy(&fixed.di_pad2, mal->di_pad2, sizeof(fixed.di_pad2));
+		uuid_copy(&fixed.di_uuid, &mal->di_uuid);
+	}
+
+	memcpy(&ldip->di_dmstate, &fixed.di_dmstate,
+	       vec->i_len - offsetof(struct xfs_log_dinode, di_dmstate));
+}
+
 STATIC int
 xlog_recover_inode_pass2(
 	struct xlog			*log,
@@ -2924,7 +2964,7 @@
 	int			attr_index;
 	uint			fields;
 	struct xfs_log_dinode	*ldip;
-	uint			isize;
+	uint			isize, malformed_isize;
 	int			need_free = 0;
 
 	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
@@ -2987,6 +3027,21 @@
 		goto out_release;
 	}
 
+	isize = xfs_log_dinode_size(ldip->di_version);
+	malformed_isize = xfs_icdinode_size_malformed(ldip->di_version);
+	if (unlikely(item->ri_buf[1].i_len == malformed_isize)) {
+		repair_malformed_inode_item(log, &item->ri_buf[1]);
+	} else if (unlikely(item->ri_buf[1].i_len > isize)) {
+		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
+				     XFS_ERRLEVEL_LOW, mp, ldip,
+				     sizeof(*ldip));
+		xfs_alert(mp,
+			"%s: Bad inode log record length %d, rec ptr "PTR_FMT,
+			__func__, item->ri_buf[1].i_len, item);
+		error = -EFSCORRUPTED;
+		goto out_release;
+	}
+
 	/*
 	 * If the inode has an LSN in it, recover the inode only if it's less
 	 * than the lsn of the transaction we are replaying. Note: we still
@@ -3083,17 +3138,6 @@
 		error = -EFSCORRUPTED;
 		goto out_release;
 	}
-	isize = xfs_log_dinode_size(ldip->di_version);
-	if (unlikely(item->ri_buf[1].i_len > isize)) {
-		XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
-				     XFS_ERRLEVEL_LOW, mp, ldip,
-				     sizeof(*ldip));
-		xfs_alert(mp,
-			"%s: Bad inode log record length %d, rec ptr "PTR_FMT,
-			__func__, item->ri_buf[1].i_len, item);
-		error = -EFSCORRUPTED;
-		goto out_release;
-	}
 
 	/* recover the log dinode inode into the on disk inode */
 	xfs_log_dinode_to_disk(ldip, dip);