Blob Blame History Raw
From: Vlastimil Babka <vbabka@suse.cz>
Subject: kabi: fix struct zone kabi after adding unaccepted_pages and NR_UNACCEPTED
Patch-mainline: Never, KABI
References: jsc#PED-7167 bsc#1218643

To add unaccepted_pages, we can move it to the hole before pad1. Keep exposing
it to kabi checker outside of x86_64 so the hole assumption is forced to be
rechecked in case e.g. arm64 tries to enable it later.

Adding NR_UNACCEPTED to enum zone_stat_item breaks KABI because it increments
NR_VM_ZONE_STAT_ITEMS and enlarges zone.vm_stat[] thus shifts
zone.vm_numa_event[], which is visible to everyone, most helpers are static
inline etc.

Solve the KABI issue by creating zone_stat_item_2 for NR_UNACCEPTED, adding
vm_stat_2 to the end of struct zone and duplicating just enough helpers to work
with NR_UNACCEPTED. Also vm_zone_stat_2 is added for the global counters.

Since this is not a hot counter and updates are done under zone->lock anyway,
we can skip the pcp vmstat diffs. Also ignore !CONFIG_SMP and !CONFIG_NUMA
helper variants.

Caveat: any future patches adding code with NR_UNACCEPTED using the normal
helpers will compile, but modify NR_FREE_PAGES due to having the same value and
typing on enums not being strong enough.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
[mkoutny: Strictly speaking vmstat_text[] is as part of KABI as enum
 zone_stat_item. Instead of adding the secondary vmstat_text[] translation
 table, carefully account for the added member in idx->name helpers. The
 correction reduces to 0 when !CONFIG_UNACCEPTED_MEMORY.
 Helpers inlined in 3rd party code remain broken (but assume this code won't
 depend on/expose the string representations).]

---
 drivers/base/node.c    |    2 +-
 fs/proc/meminfo.c      |    2 +-
 include/linux/mmzone.h |   30 ++++++++++++++++++++++++------
 include/linux/vmstat.h |   40 ++++++++++++++++++++++++++++++++++++----
 mm/page_alloc.c        |    6 +++---
 mm/vmstat.c            |   26 ++++++++++++++++++++++++++
 6 files changed, 91 insertions(+), 15 deletions(-)

--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -480,7 +480,7 @@ static ssize_t node_read_meminfo(struct
 #endif
 #ifdef CONFIG_UNACCEPTED_MEMORY
 			     ,
-			     nid, K(sum_zone_node_page_state(nid, NR_UNACCEPTED))
+			     nid, K(sum_zone_node_page_state_2(nid, NR_UNACCEPTED))
 #endif
 			    );
 	len += hugetlb_report_node_meminfo(buf, len, nid);
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -148,7 +148,7 @@ static int meminfo_proc_show(struct seq_
 
 #ifdef CONFIG_UNACCEPTED_MEMORY
 	show_val_kb(m, "Unaccepted:     ",
-		    global_zone_page_state(NR_UNACCEPTED));
+		    global_zone_page_state_2(NR_UNACCEPTED));
 #endif
 
 	hugetlb_report_meminfo(m);
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -160,10 +160,14 @@ enum zone_stat_item {
 	NR_ZSPAGES,		/* allocated in zsmalloc */
 #endif
 	NR_FREE_CMA_PAGES,
+	NR_VM_ZONE_STAT_ITEMS };
+
+enum zone_stat_item_2 {
 #ifdef CONFIG_UNACCEPTED_MEMORY
 	NR_UNACCEPTED,
 #endif
-	NR_VM_ZONE_STAT_ITEMS };
+	NR_VM_ZONE_STAT_ITEMS_2
+};
 
 enum node_stat_item {
 	NR_LRU_BASE,
@@ -612,17 +616,24 @@ struct zone {
 
 	int initialized;
 
+/*
+ * There is a hole on x86_64 thanks to _pad1_ but haven't checked other
+ * architectures so restrict this to CONFIG_X86_64. In case we later enable this
+ * on e.g. arm64, kabi check will fail and we'll need to re-evaluate.
+ */
+#if !defined(__GENKSYMS__) && defined(CONFIG_X86_64)
+#ifdef CONFIG_UNACCEPTED_MEMORY
+	/* Pages to be accepted. All pages on the list are MAX_ORDER */
+	struct list_head	unaccepted_pages;
+#endif
+#endif
+
 	/* Write-intensive fields used from the page allocator */
 	ZONE_PADDING(_pad1_)
 
 	/* free areas of different sizes */
 	struct free_area	free_area[MAX_ORDER];
 
-#ifdef CONFIG_UNACCEPTED_MEMORY
-	/* Pages to be accepted. All pages on the list are MAX_ORDER */
-	struct list_head	unaccepted_pages;
-#endif
-
 	/* zone flags, see below */
 	unsigned long		flags;
 
@@ -671,7 +682,14 @@ struct zone {
 	/* Zone statistics */
 	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
 	atomic_long_t		vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
+#ifndef __GENKSYMS__
+	union {
+		atomic_long_t		vm_stat_2[NR_VM_ZONE_STAT_ITEMS_2];
+		void *suse_kabi_padding;
+	};
+#else
 	void *suse_kabi_padding;
+#endif
 } ____cacheline_internodealigned_in_smp;
 
 enum pgdat_flags {
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -138,6 +138,7 @@ static inline void vm_events_fold_cpu(in
  * Zone and node-based page accounting with per cpu differentials.
  */
 extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS];
+extern atomic_long_t vm_zone_stat_2[NR_VM_ZONE_STAT_ITEMS_2];
 extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS];
 extern atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
 
@@ -169,6 +170,13 @@ static inline void zone_page_state_add(l
 	atomic_long_add(x, &vm_zone_stat[item]);
 }
 
+static inline void zone_page_state_add_2(long x, struct zone *zone,
+				 enum zone_stat_item_2 item)
+{
+	atomic_long_add(x, &zone->vm_stat_2[item]);
+	atomic_long_add(x, &vm_zone_stat_2[item]);
+}
+
 static inline void node_page_state_add(long x, struct pglist_data *pgdat,
 				 enum node_stat_item item)
 {
@@ -186,6 +194,16 @@ static inline unsigned long global_zone_
 	return x;
 }
 
+static inline unsigned long global_zone_page_state_2(enum zone_stat_item_2 item)
+{
+	long x = atomic_long_read(&vm_zone_stat_2[item]);
+#ifdef CONFIG_SMP
+	if (x < 0)
+		x = 0;
+#endif
+	return x;
+}
+
 static inline
 unsigned long global_node_page_state_pages(enum node_stat_item item)
 {
@@ -215,6 +233,17 @@ static inline unsigned long zone_page_st
 	return x;
 }
 
+static inline unsigned long zone_page_state_2(struct zone *zone,
+					enum zone_stat_item_2 item)
+{
+	long x = atomic_long_read(&zone->vm_stat_2[item]);
+#ifdef CONFIG_SMP
+	if (x < 0)
+		x = 0;
+#endif
+	return x;
+}
+
 /*
  * More accurate version that also considers the currently pending
  * deltas. For that we need to loop over all cpus to find the current
@@ -257,6 +286,8 @@ __count_numa_events(struct zone *zone, e
 
 extern unsigned long sum_zone_node_page_state(int node,
 					      enum zone_stat_item item);
+extern unsigned long sum_zone_node_page_state_2(int node,
+					      enum zone_stat_item_2 item);
 extern unsigned long sum_zone_numa_event_state(int node, enum numa_stat_item item);
 extern unsigned long node_page_state(struct pglist_data *pgdat,
 						enum node_stat_item item);
@@ -274,6 +305,7 @@ static inline void fold_vm_numa_events(v
 
 #ifdef CONFIG_SMP
 void __mod_zone_page_state(struct zone *, enum zone_stat_item item, long);
+void __mod_zone_page_state_2(struct zone *, enum zone_stat_item_2 item, long);
 void __inc_zone_page_state(struct page *, enum zone_stat_item);
 void __dec_zone_page_state(struct page *, enum zone_stat_item);
 
@@ -433,14 +465,14 @@ static inline const char *zone_stat_name
 #ifdef CONFIG_NUMA
 static inline const char *numa_stat_name(enum numa_stat_item item)
 {
-	return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
+	return vmstat_text[NR_VM_ZONE_STAT_ITEMS + NR_VM_ZONE_STAT_ITEMS_2 +
 			   item];
 }
 #endif /* CONFIG_NUMA */
 
 static inline const char *node_stat_name(enum node_stat_item item)
 {
-	return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
+	return vmstat_text[NR_VM_ZONE_STAT_ITEMS + NR_VM_ZONE_STAT_ITEMS_2 +
 			   NR_VM_NUMA_EVENT_ITEMS +
 			   item];
 }
@@ -452,7 +484,7 @@ static inline const char *lru_list_name(
 
 static inline const char *writeback_stat_name(enum writeback_stat_item item)
 {
-	return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
+	return vmstat_text[NR_VM_ZONE_STAT_ITEMS + NR_VM_ZONE_STAT_ITEMS_2 +
 			   NR_VM_NUMA_EVENT_ITEMS +
 			   NR_VM_NODE_STAT_ITEMS +
 			   item];
@@ -461,7 +493,7 @@ static inline const char *writeback_stat
 #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
 static inline const char *vm_event_name(enum vm_event_item item)
 {
-	return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
+	return vmstat_text[NR_VM_ZONE_STAT_ITEMS + NR_VM_ZONE_STAT_ITEMS_2 +
 			   NR_VM_NUMA_EVENT_ITEMS +
 			   NR_VM_NODE_STAT_ITEMS +
 			   NR_VM_WRITEBACK_STAT_ITEMS +
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3834,7 +3834,7 @@ static inline long __zone_watermark_unus
 		unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
 #endif
 #ifdef CONFIG_UNACCEPTED_MEMORY
-	unusable_free += zone_page_state(z, NR_UNACCEPTED);
+	unusable_free += zone_page_state_2(z, NR_UNACCEPTED);
 #endif
 
 	return unusable_free;
@@ -9682,7 +9682,7 @@ static bool try_to_accept_memory_one(str
 	last = list_empty(&zone->unaccepted_pages);
 
 	__mod_zone_freepage_state(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
-	__mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES);
+	__mod_zone_page_state_2(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES);
 	spin_unlock_irqrestore(&zone->lock, flags);
 
 	accept_page(page, MAX_ORDER - 1);
@@ -9734,7 +9734,7 @@ static bool __free_unaccepted(struct pag
 	first = list_empty(&zone->unaccepted_pages);
 	list_add_tail(&page->lru, &zone->unaccepted_pages);
 	__mod_zone_freepage_state(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
-	__mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES);
+	__mod_zone_page_state_2(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES);
 	spin_unlock_irqrestore(&zone->lock, flags);
 
 	if (first)
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -160,6 +160,7 @@ void vm_events_fold_cpu(int cpu)
  * vm_stat contains the global counters
  */
 atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
+atomic_long_t vm_zone_stat_2[NR_VM_ZONE_STAT_ITEMS_2] __cacheline_aligned_in_smp;
 atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
 atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp;
 EXPORT_SYMBOL(vm_zone_stat);
@@ -372,6 +373,18 @@ void __mod_zone_page_state(struct zone *
 }
 EXPORT_SYMBOL(__mod_zone_page_state);
 
+void __mod_zone_page_state_2(struct zone *zone, enum zone_stat_item_2 item,
+			   long delta)
+{
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_disable();
+
+	zone_page_state_add_2(delta, zone, item);
+
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		preempt_enable();
+}
+
 void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
 				long delta)
 {
@@ -1003,6 +1016,19 @@ unsigned long sum_zone_node_page_state(i
 
 	return count;
 }
+
+unsigned long sum_zone_node_page_state_2(int node,
+				 enum zone_stat_item_2 item)
+{
+	struct zone *zones = NODE_DATA(node)->node_zones;
+	int i;
+	unsigned long count = 0;
+
+	for (i = 0; i < MAX_NR_ZONES; i++)
+		count += zone_page_state_2(zones + i, item);
+
+	return count;
+}
 
 /* Determine the per node value of a numa stat item. */
 unsigned long sum_zone_numa_event_state(int node,