Blob Blame History Raw
From: Vlastimil Babka <vbabka@suse.cz>
Subject: kabi: fix struct zone kabi after adding unaccepted_pages and NR_UNACCEPTED
Patch-mainline: Never, KABI
References: jsc#PED-7167 bsc#1218643 bsc#1221338 bsc#1220114

To add unaccepted_pages, we can move it to the hole before pad1. Keep exposing
it to kabi checker outside of x86_64 so the hole assumption is forced to be
rechecked in case e.g. arm64 tries to enable it later.

Adding NR_UNACCEPTED to enum zone_stat_item breaks KABI because it increments
NR_VM_ZONE_STAT_ITEMS and enlarges zone.vm_stat[] thus shifts
zone.vm_numa_event[], which is visible to everyone, most helpers are static
inline etc.

Solve the KABI issue by creating zone_stat_item_2 for NR_UNACCEPTED, adding
vm_stat_2 to the end of struct zone and duplicating just enough helpers to work
with NR_UNACCEPTED. Also vm_zone_stat_2 is added for the global counters.

Since this is not a hot counter and updates are done under zone->lock anyway,
we can skip the pcp vmstat diffs. Also ignore !CONFIG_SMP and !CONFIG_NUMA
helper variants.

Caveat: any future patches adding code with NR_UNACCEPTED using the normal
helpers will compile, but modify NR_FREE_PAGES due to having the same value and
typing on enums not being strong enough.

In vmstat_text[] we add the new field as the very last one and adjust printing
vmstat, node vmstat, zoneinfo, to include the field.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>

---
 drivers/base/node.c    |    7 ++++++-
 fs/proc/meminfo.c      |    2 +-
 include/linux/mmzone.h |   30 ++++++++++++++++++++++++------
 include/linux/vmstat.h |   42 ++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c        |    6 +++---
 mm/vmstat.c            |   46 ++++++++++++++++++++++++++++++++++++++++++----
 6 files changed, 118 insertions(+), 15 deletions(-)

--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -480,7 +480,7 @@ static ssize_t node_read_meminfo(struct
 #endif
 #ifdef CONFIG_UNACCEPTED_MEMORY
 			     ,
-			     nid, K(sum_zone_node_page_state(nid, NR_UNACCEPTED))
+			     nid, K(sum_zone_node_page_state_2(nid, NR_UNACCEPTED))
 #endif
 			    );
 	len += hugetlb_report_node_meminfo(buf, len, nid);
@@ -540,6 +540,11 @@ static ssize_t node_read_vmstat(struct d
 				     pages);
 	}
 
+	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS_2; i++)
+		len += sysfs_emit_at(buf, len, "%s %lu\n",
+				     zone_stat_name_2(i),
+				     sum_zone_node_page_state_2(nid, i));
+
 	return len;
 }
 static DEVICE_ATTR(vmstat, 0444, node_read_vmstat, NULL);
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -148,7 +148,7 @@ static int meminfo_proc_show(struct seq_
 
 #ifdef CONFIG_UNACCEPTED_MEMORY
 	show_val_kb(m, "Unaccepted:     ",
-		    global_zone_page_state(NR_UNACCEPTED));
+		    global_zone_page_state_2(NR_UNACCEPTED));
 #endif
 
 	hugetlb_report_meminfo(m);
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -160,10 +160,14 @@ enum zone_stat_item {
 	NR_ZSPAGES,		/* allocated in zsmalloc */
 #endif
 	NR_FREE_CMA_PAGES,
+	NR_VM_ZONE_STAT_ITEMS };
+
+enum zone_stat_item_2 {
 #ifdef CONFIG_UNACCEPTED_MEMORY
 	NR_UNACCEPTED,
 #endif
-	NR_VM_ZONE_STAT_ITEMS };
+	NR_VM_ZONE_STAT_ITEMS_2
+};
 
 enum node_stat_item {
 	NR_LRU_BASE,
@@ -612,17 +616,24 @@ struct zone {
 
 	int initialized;
 
+/*
+ * There is a hole on x86_64 thanks to _pad1_ but haven't checked other
+ * architectures so restrict this to CONFIG_X86_64. In case we later enable this
+ * on e.g. arm64, kabi check will fail and we'll need to re-evaluate.
+ */
+#if !defined(__GENKSYMS__) && defined(CONFIG_X86_64)
+#ifdef CONFIG_UNACCEPTED_MEMORY
+	/* Pages to be accepted. All pages on the list are MAX_ORDER */
+	struct list_head	unaccepted_pages;
+#endif
+#endif
+
 	/* Write-intensive fields used from the page allocator */
 	ZONE_PADDING(_pad1_)
 
 	/* free areas of different sizes */
 	struct free_area	free_area[MAX_ORDER];
 
-#ifdef CONFIG_UNACCEPTED_MEMORY
-	/* Pages to be accepted. All pages on the list are MAX_ORDER */
-	struct list_head	unaccepted_pages;
-#endif
-
 	/* zone flags, see below */
 	unsigned long		flags;
 
@@ -671,7 +682,14 @@ struct zone {
 	/* Zone statistics */
 	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
 	atomic_long_t		vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
+#ifndef __GENKSYMS__
+	union {
+		atomic_long_t		vm_stat_2[NR_VM_ZONE_STAT_ITEMS_2];
+		void *suse_kabi_padding;
+	};
+#else
 	void *suse_kabi_padding;
+#endif
 } ____cacheline_internodealigned_in_smp;
 
 enum pgdat_flags {
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -138,6 +138,7 @@ static inline void vm_events_fold_cpu(in
  * Zone and node-based page accounting with per cpu differentials.
  */
 extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS];
+extern atomic_long_t vm_zone_stat_2[NR_VM_ZONE_STAT_ITEMS_2];
 extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS];
 extern atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
 
@@ -169,6 +170,13 @@ static inline void zone_page_state_add(l
 	atomic_long_add(x, &vm_zone_stat[item]);
 }
 
+static inline void zone_page_state_add_2(long x, struct zone *zone,
+				 enum zone_stat_item_2 item)
+{
+	atomic_long_add(x, &zone->vm_stat_2[item]);
+	atomic_long_add(x, &vm_zone_stat_2[item]);
+}
+
 static inline void node_page_state_add(long x, struct pglist_data *pgdat,
 				 enum node_stat_item item)
 {
@@ -186,6 +194,16 @@ static inline unsigned long global_zone_
 	return x;
 }
 
+static inline unsigned long global_zone_page_state_2(enum zone_stat_item_2 item)
+{
+	long x = atomic_long_read(&vm_zone_stat_2[item]);
+#ifdef CONFIG_SMP
+	if (x < 0)
+		x = 0;
+#endif
+	return x;
+}
+
 static inline
 unsigned long global_node_page_state_pages(enum node_stat_item item)
 {
@@ -215,6 +233,17 @@ static inline unsigned long zone_page_st
 	return x;
 }
 
+static inline unsigned long zone_page_state_2(struct zone *zone,
+					enum zone_stat_item_2 item)
+{
+	long x = atomic_long_read(&zone->vm_stat_2[item]);
+#ifdef CONFIG_SMP
+	if (x < 0)
+		x = 0;
+#endif
+	return x;
+}
+
 /*
  * More accurate version that also considers the currently pending
  * deltas. For that we need to loop over all cpus to find the current
@@ -257,6 +286,8 @@ __count_numa_events(struct zone *zone, e
 
 extern unsigned long sum_zone_node_page_state(int node,
 					      enum zone_stat_item item);
+extern unsigned long sum_zone_node_page_state_2(int node,
+					      enum zone_stat_item_2 item);
 extern unsigned long sum_zone_numa_event_state(int node, enum numa_stat_item item);
 extern unsigned long node_page_state(struct pglist_data *pgdat,
 						enum node_stat_item item);
@@ -274,6 +305,7 @@ static inline void fold_vm_numa_events(v
 
 #ifdef CONFIG_SMP
 void __mod_zone_page_state(struct zone *, enum zone_stat_item item, long);
+void __mod_zone_page_state_2(struct zone *, enum zone_stat_item_2 item, long);
 void __inc_zone_page_state(struct page *, enum zone_stat_item);
 void __dec_zone_page_state(struct page *, enum zone_stat_item);
 
@@ -469,6 +501,16 @@ static inline const char *vm_event_name(
 }
 #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
 
+static inline const char *zone_stat_name_2(enum zone_stat_item_2 item)
+{
+	return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
+			   NR_VM_NUMA_EVENT_ITEMS +
+			   NR_VM_NODE_STAT_ITEMS +
+			   NR_VM_WRITEBACK_STAT_ITEMS +
+			   NR_VM_EVENT_ITEMS +
+			   item];
+}
+
 #ifdef CONFIG_MEMCG
 
 void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3835,7 +3835,7 @@ static inline long __zone_watermark_unus
 		unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
 #endif
 #ifdef CONFIG_UNACCEPTED_MEMORY
-	unusable_free += zone_page_state(z, NR_UNACCEPTED);
+	unusable_free += zone_page_state_2(z, NR_UNACCEPTED);
 #endif
 
 	return unusable_free;
@@ -9709,7 +9709,7 @@ static bool try_to_accept_memory_one(str
 	last = list_empty(&zone->unaccepted_pages);
 
 	__mod_zone_freepage_state(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
-	__mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES);
+	__mod_zone_page_state_2(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES);
 	spin_unlock_irqrestore(&zone->lock, flags);
 
 	accept_page(page, MAX_ORDER - 1);
@@ -9761,7 +9761,7 @@ static bool __free_unaccepted(struct pag
 	first = list_empty(&zone->unaccepted_pages);
 	list_add_tail(&page->lru, &zone->unaccepted_pages);
 	__mod_zone_freepage_state(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
-	__mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES);
+	__mod_zone_page_state_2(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES);
 	spin_unlock_irqrestore(&zone->lock, flags);
 
 	if (first)
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -161,6 +161,7 @@ void vm_events_fold_cpu(int cpu)
  * vm_stat contains the global counters
  */
 atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
+atomic_long_t vm_zone_stat_2[NR_VM_ZONE_STAT_ITEMS_2] __cacheline_aligned_in_smp;
 atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
 atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp;
 EXPORT_SYMBOL(vm_zone_stat);
@@ -373,6 +374,18 @@ void __mod_zone_page_state(struct zone *
 }
 EXPORT_SYMBOL(__mod_zone_page_state);
 
+void __mod_zone_page_state_2(struct zone *zone, enum zone_stat_item_2 item,
+			   long delta)
+{
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_disable();
+
+	zone_page_state_add_2(delta, zone, item);
+
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_enable();
+}
+
 void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
 				long delta)
 {
@@ -1005,6 +1018,19 @@ unsigned long sum_zone_node_page_state(i
 	return count;
 }
 
+unsigned long sum_zone_node_page_state_2(int node,
+				 enum zone_stat_item_2 item)
+{
+	struct zone *zones = NODE_DATA(node)->node_zones;
+	int i;
+	unsigned long count = 0;
+
+	for (i = 0; i < MAX_NR_ZONES; i++)
+		count += zone_page_state_2(zones + i, item);
+
+	return count;
+}
+
 /* Determine the per node value of a numa stat item. */
 unsigned long sum_zone_numa_event_state(int node,
 				 enum numa_stat_item item)
@@ -1181,9 +1207,6 @@ const char * const vmstat_text[] = {
 	"nr_zspages",
 #endif
 	"nr_free_cma",
-#ifdef CONFIG_UNACCEPTED_MEMORY
-	"nr_unaccepted",
-#endif
 
 	/* enum numa_stat_item counters */
 #ifdef CONFIG_NUMA
@@ -1390,6 +1413,11 @@ const char * const vmstat_text[] = {
 	"direct_map_level3_splits",
 #endif
 #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
+
+	/* enum zone_stat_item_2 counters */
+#ifdef CONFIG_UNACCEPTED_MEMORY
+	"nr_unaccepted",
+#endif
 };
 #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */
 
@@ -1699,6 +1727,10 @@ static void zoneinfo_show_print(struct s
 		seq_printf(m, "\n      %-12s %lu", zone_stat_name(i),
 			   zone_page_state(zone, i));
 
+	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS_2; i++)
+		seq_printf(m, "\n      %-12s %lu", zone_stat_name_2(i),
+			   zone_page_state_2(zone, i));
+
 #ifdef CONFIG_NUMA
 	for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
 		seq_printf(m, "\n      %-12s %lu", numa_stat_name(i),
@@ -1760,7 +1792,8 @@ static const struct seq_operations zonei
 			 NR_VM_NODE_STAT_ITEMS + \
 			 NR_VM_WRITEBACK_STAT_ITEMS + \
 			 (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
-			  NR_VM_EVENT_ITEMS : 0))
+			  NR_VM_EVENT_ITEMS : 0) + \
+			  NR_VM_ZONE_STAT_ITEMS_2)
 
 static void *vmstat_start(struct seq_file *m, loff_t *pos)
 {
@@ -1802,6 +1835,11 @@ static void *vmstat_start(struct seq_fil
 	v[PGPGIN] /= 2;		/* sectors -> kbytes */
 	v[PGPGOUT] /= 2;
 #endif
+	v += NR_VM_EVENT_ITEMS;
+
+	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS_2; i++)
+		v[i] = global_zone_page_state_2(i);
+
 	return (unsigned long *)m->private + *pos;
 }