From: Vlastimil Babka <vbabka@suse.cz>
Subject: kabi: fix struct zone kabi after adding unaccepted_pages and NR_UNACCEPTED
Patch-mainline: Never, KABI
References: jsc#PED-7167 bsc#1218643 bsc#1221338 bsc#1220114
To add unaccepted_pages, we can move it to the hole before pad1. Keep exposing
it to kabi checker outside of x86_64 so the hole assumption is forced to be
rechecked in case e.g. arm64 tries to enable it later.
Adding NR_UNACCEPTED to enum zone_stat_item breaks KABI because it increments
NR_VM_ZONE_STAT_ITEMS and enlarges zone.vm_stat[] thus shifts
zone.vm_numa_event[], which is visible to everyone, most helpers are static
inline etc.
Solve the KABI issue by creating zone_stat_item_2 for NR_UNACCEPTED, adding
vm_stat_2 to the end of struct zone and duplicating just enough helpers to work
with NR_UNACCEPTED. Also vm_zone_stat_2 is added for the global counters.
Since this is not a hot counter and updates are done under zone->lock anyway,
we can skip the pcp vmstat diffs. Also ignore !CONFIG_SMP and !CONFIG_NUMA
helper variants.
Caveat: any future patches adding code with NR_UNACCEPTED using the normal
helpers will compile, but modify NR_FREE_PAGES due to having the same value and
typing on enums not being strong enough.
In vmstat_text[] we add the new field as the very last one and adjust printing
vmstat, node vmstat, zoneinfo, to include the field.
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
drivers/base/node.c | 7 ++++++-
fs/proc/meminfo.c | 2 +-
include/linux/mmzone.h | 30 ++++++++++++++++++++++++------
include/linux/vmstat.h | 42 ++++++++++++++++++++++++++++++++++++++++++
mm/page_alloc.c | 6 +++---
mm/vmstat.c | 46 ++++++++++++++++++++++++++++++++++++++++++----
6 files changed, 118 insertions(+), 15 deletions(-)
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -480,7 +480,7 @@ static ssize_t node_read_meminfo(struct
#endif
#ifdef CONFIG_UNACCEPTED_MEMORY
,
- nid, K(sum_zone_node_page_state(nid, NR_UNACCEPTED))
+ nid, K(sum_zone_node_page_state_2(nid, NR_UNACCEPTED))
#endif
);
len += hugetlb_report_node_meminfo(buf, len, nid);
@@ -540,6 +540,11 @@ static ssize_t node_read_vmstat(struct d
pages);
}
+ for (i = 0; i < NR_VM_ZONE_STAT_ITEMS_2; i++)
+ len += sysfs_emit_at(buf, len, "%s %lu\n",
+ zone_stat_name_2(i),
+ sum_zone_node_page_state_2(nid, i));
+
return len;
}
static DEVICE_ATTR(vmstat, 0444, node_read_vmstat, NULL);
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -148,7 +148,7 @@ static int meminfo_proc_show(struct seq_
#ifdef CONFIG_UNACCEPTED_MEMORY
show_val_kb(m, "Unaccepted: ",
- global_zone_page_state(NR_UNACCEPTED));
+ global_zone_page_state_2(NR_UNACCEPTED));
#endif
hugetlb_report_meminfo(m);
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -160,10 +160,14 @@ enum zone_stat_item {
NR_ZSPAGES, /* allocated in zsmalloc */
#endif
NR_FREE_CMA_PAGES,
+ NR_VM_ZONE_STAT_ITEMS };
+
+enum zone_stat_item_2 {
#ifdef CONFIG_UNACCEPTED_MEMORY
NR_UNACCEPTED,
#endif
- NR_VM_ZONE_STAT_ITEMS };
+ NR_VM_ZONE_STAT_ITEMS_2
+};
enum node_stat_item {
NR_LRU_BASE,
@@ -612,17 +616,24 @@ struct zone {
int initialized;
+/*
+ * There is a hole on x86_64 thanks to _pad1_ but haven't checked other
+ * architectures so restrict this to CONFIG_X86_64. In case we later enable this
+ * on e.g. arm64, kabi check will fail and we'll need to re-evaluate.
+ */
+#if !defined(__GENKSYMS__) && defined(CONFIG_X86_64)
+#ifdef CONFIG_UNACCEPTED_MEMORY
+ /* Pages to be accepted. All pages on the list are MAX_ORDER */
+ struct list_head unaccepted_pages;
+#endif
+#endif
+
/* Write-intensive fields used from the page allocator */
ZONE_PADDING(_pad1_)
/* free areas of different sizes */
struct free_area free_area[MAX_ORDER];
-#ifdef CONFIG_UNACCEPTED_MEMORY
- /* Pages to be accepted. All pages on the list are MAX_ORDER */
- struct list_head unaccepted_pages;
-#endif
-
/* zone flags, see below */
unsigned long flags;
@@ -671,7 +682,14 @@ struct zone {
/* Zone statistics */
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
+#ifndef __GENKSYMS__
+ union {
+ atomic_long_t vm_stat_2[NR_VM_ZONE_STAT_ITEMS_2];
+ void *suse_kabi_padding;
+ };
+#else
void *suse_kabi_padding;
+#endif
} ____cacheline_internodealigned_in_smp;
enum pgdat_flags {
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -138,6 +138,7 @@ static inline void vm_events_fold_cpu(in
* Zone and node-based page accounting with per cpu differentials.
*/
extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS];
+extern atomic_long_t vm_zone_stat_2[NR_VM_ZONE_STAT_ITEMS_2];
extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS];
extern atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
@@ -169,6 +170,13 @@ static inline void zone_page_state_add(l
atomic_long_add(x, &vm_zone_stat[item]);
}
+static inline void zone_page_state_add_2(long x, struct zone *zone,
+ enum zone_stat_item_2 item)
+{
+ atomic_long_add(x, &zone->vm_stat_2[item]);
+ atomic_long_add(x, &vm_zone_stat_2[item]);
+}
+
static inline void node_page_state_add(long x, struct pglist_data *pgdat,
enum node_stat_item item)
{
@@ -186,6 +194,16 @@ static inline unsigned long global_zone_
return x;
}
+static inline unsigned long global_zone_page_state_2(enum zone_stat_item_2 item)
+{
+ long x = atomic_long_read(&vm_zone_stat_2[item]);
+#ifdef CONFIG_SMP
+ if (x < 0)
+ x = 0;
+#endif
+ return x;
+}
+
static inline
unsigned long global_node_page_state_pages(enum node_stat_item item)
{
@@ -215,6 +233,17 @@ static inline unsigned long zone_page_st
return x;
}
+static inline unsigned long zone_page_state_2(struct zone *zone,
+ enum zone_stat_item_2 item)
+{
+ long x = atomic_long_read(&zone->vm_stat_2[item]);
+#ifdef CONFIG_SMP
+ if (x < 0)
+ x = 0;
+#endif
+ return x;
+}
+
/*
* More accurate version that also considers the currently pending
* deltas. For that we need to loop over all cpus to find the current
@@ -257,6 +286,8 @@ __count_numa_events(struct zone *zone, e
extern unsigned long sum_zone_node_page_state(int node,
enum zone_stat_item item);
+extern unsigned long sum_zone_node_page_state_2(int node,
+ enum zone_stat_item_2 item);
extern unsigned long sum_zone_numa_event_state(int node, enum numa_stat_item item);
extern unsigned long node_page_state(struct pglist_data *pgdat,
enum node_stat_item item);
@@ -274,6 +305,7 @@ static inline void fold_vm_numa_events(v
#ifdef CONFIG_SMP
void __mod_zone_page_state(struct zone *, enum zone_stat_item item, long);
+void __mod_zone_page_state_2(struct zone *, enum zone_stat_item_2 item, long);
void __inc_zone_page_state(struct page *, enum zone_stat_item);
void __dec_zone_page_state(struct page *, enum zone_stat_item);
@@ -469,6 +501,16 @@ static inline const char *vm_event_name(
}
#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
+static inline const char *zone_stat_name_2(enum zone_stat_item_2 item)
+{
+ return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
+ NR_VM_NUMA_EVENT_ITEMS +
+ NR_VM_NODE_STAT_ITEMS +
+ NR_VM_WRITEBACK_STAT_ITEMS +
+ NR_VM_EVENT_ITEMS +
+ item];
+}
+
#ifdef CONFIG_MEMCG
void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3835,7 +3835,7 @@ static inline long __zone_watermark_unus
unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
#endif
#ifdef CONFIG_UNACCEPTED_MEMORY
- unusable_free += zone_page_state(z, NR_UNACCEPTED);
+ unusable_free += zone_page_state_2(z, NR_UNACCEPTED);
#endif
return unusable_free;
@@ -9709,7 +9709,7 @@ static bool try_to_accept_memory_one(str
last = list_empty(&zone->unaccepted_pages);
__mod_zone_freepage_state(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
- __mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES);
+ __mod_zone_page_state_2(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES);
spin_unlock_irqrestore(&zone->lock, flags);
accept_page(page, MAX_ORDER - 1);
@@ -9761,7 +9761,7 @@ static bool __free_unaccepted(struct pag
first = list_empty(&zone->unaccepted_pages);
list_add_tail(&page->lru, &zone->unaccepted_pages);
__mod_zone_freepage_state(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
- __mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES);
+ __mod_zone_page_state_2(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES);
spin_unlock_irqrestore(&zone->lock, flags);
if (first)
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -161,6 +161,7 @@ void vm_events_fold_cpu(int cpu)
* vm_stat contains the global counters
*/
atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
+atomic_long_t vm_zone_stat_2[NR_VM_ZONE_STAT_ITEMS_2] __cacheline_aligned_in_smp;
atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp;
EXPORT_SYMBOL(vm_zone_stat);
@@ -373,6 +374,18 @@ void __mod_zone_page_state(struct zone *
}
EXPORT_SYMBOL(__mod_zone_page_state);
+void __mod_zone_page_state_2(struct zone *zone, enum zone_stat_item_2 item,
+ long delta)
+{
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_disable();
+
+ zone_page_state_add_2(delta, zone, item);
+
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_enable();
+}
+
void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
long delta)
{
@@ -1005,6 +1018,19 @@ unsigned long sum_zone_node_page_state(i
return count;
}
+unsigned long sum_zone_node_page_state_2(int node,
+ enum zone_stat_item_2 item)
+{
+ struct zone *zones = NODE_DATA(node)->node_zones;
+ int i;
+ unsigned long count = 0;
+
+ for (i = 0; i < MAX_NR_ZONES; i++)
+ count += zone_page_state_2(zones + i, item);
+
+ return count;
+}
+
/* Determine the per node value of a numa stat item. */
unsigned long sum_zone_numa_event_state(int node,
enum numa_stat_item item)
@@ -1181,9 +1207,6 @@ const char * const vmstat_text[] = {
"nr_zspages",
#endif
"nr_free_cma",
-#ifdef CONFIG_UNACCEPTED_MEMORY
- "nr_unaccepted",
-#endif
/* enum numa_stat_item counters */
#ifdef CONFIG_NUMA
@@ -1390,6 +1413,11 @@ const char * const vmstat_text[] = {
"direct_map_level3_splits",
#endif
#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
+
+ /* enum zone_stat_item_2 counters */
+#ifdef CONFIG_UNACCEPTED_MEMORY
+ "nr_unaccepted",
+#endif
};
#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */
@@ -1699,6 +1727,10 @@ static void zoneinfo_show_print(struct s
seq_printf(m, "\n %-12s %lu", zone_stat_name(i),
zone_page_state(zone, i));
+ for (i = 0; i < NR_VM_ZONE_STAT_ITEMS_2; i++)
+ seq_printf(m, "\n %-12s %lu", zone_stat_name_2(i),
+ zone_page_state_2(zone, i));
+
#ifdef CONFIG_NUMA
for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
seq_printf(m, "\n %-12s %lu", numa_stat_name(i),
@@ -1760,7 +1792,8 @@ static const struct seq_operations zonei
NR_VM_NODE_STAT_ITEMS + \
NR_VM_WRITEBACK_STAT_ITEMS + \
(IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
- NR_VM_EVENT_ITEMS : 0))
+ NR_VM_EVENT_ITEMS : 0) + \
+ NR_VM_ZONE_STAT_ITEMS_2)
static void *vmstat_start(struct seq_file *m, loff_t *pos)
{
@@ -1802,6 +1835,11 @@ static void *vmstat_start(struct seq_fil
v[PGPGIN] /= 2; /* sectors -> kbytes */
v[PGPGOUT] /= 2;
#endif
+ v += NR_VM_EVENT_ITEMS;
+
+ for (i = 0; i < NR_VM_ZONE_STAT_ITEMS_2; i++)
+ v[i] = global_zone_page_state_2(i);
+
return (unsigned long *)m->private + *pos;
}