Blob Blame History Raw
From 282f75e33f93e46854345598f576d052d5cfe390 Mon Sep 17 00:00:00 2001
From: Fabian Vogt <fvogt@suse.de>
Date: Thu, 8 Dec 2022 12:50:12 +0100
Subject: [PATCH] Add x86-64 architecture levels (v2-v4) as architectures

The x86_64 SysV psABI defines four levels of x86_64 with certain CPU features
required for each level. Those definitions are meant to be generically useful
and recognized as such by glibc and gcc as well.

For backward-compatibility and to avoid surprises, default to building x86_64
even on v2+ capable machines.
---
 installplatform |  2 +-
 lib/rpmrc.c     | 75 +++++++++++++++++++++++++++++++++++++++++++++++++
 macros.in       |  4 +++
 rpmrc.in        | 18 ++++++++++++
 4 files changed, 98 insertions(+), 1 deletion(-)

--- installplatform
+++ installplatform
@@ -78,7 +78,7 @@ for ARCH in noarch `grep ^arch_canon $RP
 	CANONARCH=i386
 	CANONCOLOR=0
 	;;
-    x86_64|amd64|ia32e)
+    x86_64*|amd64|ia32e)
 	ISANAME=x86
 	ISABITS=64
 	CANONARCH=x86_64
--- lib/rpmrc.c
+++ lib/rpmrc.c
@@ -735,6 +735,71 @@ exit:
     return rc;
 }
 
+#	if defined(__linux__) && defined(__x86_64__)
+static inline void cpuid(uint32_t op, uint32_t op2, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
+{
+    asm volatile (
+	"cpuid\n"
+    : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
+    : "a" (op), "c" (op2));
+}
+
+/* From gcc's gcc/config/i386/cpuid.h */
+/* Features (%eax == 1) */
+/* %ecx */
+#define bit_SSE3	(1 << 0)
+#define bit_LZCNT	(1 << 5)
+#define bit_SSSE3	(1 << 9)
+#define bit_FMA		(1 << 12)
+#define bit_CMPXCHG16B	(1 << 13)
+#define bit_SSE4_1	(1 << 19)
+#define bit_SSE4_2	(1 << 20)
+#define bit_MOVBE	(1 << 22)
+#define bit_POPCNT	(1 << 23)
+#define bit_OSXSAVE	(1 << 27)
+#define bit_AVX		(1 << 28)
+#define bit_F16C	(1 << 29)
+
+/* Extended Features (%eax == 0x80000001) */
+/* %ecx */
+#define bit_LAHF_LM	(1 << 0)
+
+/* Extended Features (%eax == 7) */
+/* %ebx */
+#define bit_BMI		(1 << 3)
+#define bit_AVX2	(1 << 5)
+#define bit_BMI2	(1 << 8)
+#define bit_AVX512F	(1 << 16)
+#define bit_AVX512DQ	(1 << 17)
+#define bit_AVX512CD	(1 << 28)
+#define bit_AVX512BW	(1 << 30)
+#define bit_AVX512VL	(1u << 31)
+
+static int get_x86_64_level(void)
+{
+    int level = 1;
+
+    unsigned int op_1_ecx = 0, op_80000001_ecx = 0, op_7_ebx = 0, unused;
+    cpuid(1, 0, &unused, &unused, &op_1_ecx, &unused);
+    cpuid(0x80000001, 0, &unused, &unused, &op_80000001_ecx, &unused);
+    cpuid(7, 0, &unused, &op_7_ebx, &unused, &unused);
+
+    const unsigned int op_1_ecx_lv2 = bit_SSE3 | bit_SSSE3 | bit_CMPXCHG16B | bit_SSE4_1 | bit_SSE4_2 | bit_POPCNT;
+    if ((op_1_ecx & op_1_ecx_lv2) == op_1_ecx_lv2 && (op_80000001_ecx & bit_LAHF_LM))
+	level = 2;
+
+    const unsigned int op_1_ecx_lv3 = bit_LZCNT | bit_FMA | bit_MOVBE | bit_OSXSAVE | bit_AVX | bit_F16C;
+    const unsigned int op_7_ebx_lv3 = bit_BMI | bit_AVX2 | bit_BMI2;
+    if (level == 2 && (op_1_ecx & op_1_ecx_lv3) == op_1_ecx_lv3 && (op_7_ebx & op_7_ebx_lv3) == op_7_ebx_lv3)
+        level = 3;
+
+    const unsigned int op_7_ebx_lv4 = bit_AVX512F | bit_AVX512DQ | bit_AVX512CD | bit_AVX512BW | bit_AVX512VL;
+    if (level == 3 && (op_7_ebx & op_7_ebx_lv4) == op_7_ebx_lv4)
+        level = 4;
+
+    return level;
+}
+#	endif
 
 #	if defined(__linux__) && defined(__i386__)
 #include <setjmp.h>
@@ -1287,6 +1352,16 @@ static void defaultMachine(rpmrcCtx ctx,
 	}
 #	endif
 
+# if defined(__linux__) && defined(__x86_64__)
+	{
+	    int x86_64_level = get_x86_64_level();
+	    if (x86_64_level > 1) {
+	        strcpy(un.machine, "x86_64_vX");
+	        un.machine[8] = '0' + x86_64_level;
+	    }
+	}
+#endif
+
 	/* the uname() result goes through the arch_canon table */
 	canon = lookupInCanonTable(un.machine,
 			   ctx->tables[RPM_MACHTABLE_INSTARCH].canons,
--- macros.in
+++ macros.in
@@ -1064,6 +1064,10 @@ package or when debugging this package.\
 %ix86   i386 i486 i586 i686 pentium3 pentium4 athlon geode
 
 #------------------------------------------------------------------------------
+# arch macro for all supported x86_64 processors
+%x86_64	x86_64 x86_64_v2 x86_64_v3 x86_64_v4 amd64 em64t
+
+#------------------------------------------------------------------------------
 # arch macro for all supported 32-bit ARM processors
 %arm32	armv3l armv4b armv4l armv4tl armv5b armv5l armv5teb armv5tel armv5tejl armv6l armv6hl armv7l armv7hl armv7hnl armv8l armv8hl armv8hnl armv8hcnl
 
--- rpmrc.in
+++ rpmrc.in
@@ -22,6 +22,10 @@ optflags: athlon -O2 -g -m32 -march=athl
 optflags: geode -Os -g -m32 -march=geode
 optflags: ia64 -O2 -g -fmessage-length=0 -D_FORTIFY_SOURCE=2 -fstack-protector -funwind-tables -fasynchronous-unwind-tables
 optflags: x86_64 -O2 -g -m64 -fmessage-length=0 -D_FORTIFY_SOURCE=2 -fstack-protector -funwind-tables -fasynchronous-unwind-tables
+optflags: x86_64_v2 -O2 -g -march=x86-64-v2 -m64 -fmessage-length=0 -D_FORTIFY_SOURCE=2 -fstack-protector -funwind-tables -fasynchronous-unwind-tables
+optflags: x86_64_v3 -O2 -g -march=x86-64-v3 -m64 -fmessage-length=0 -D_FORTIFY_SOURCE=2 -fstack-protector -funwind-tables -fasynchronous-unwind-tables
+optflags: x86_64_v4 -O2 -g -march=x86-64-v4 -m64 -fmessage-length=0 -D_FORTIFY_SOURCE=2 -fstack-protector -funwind-tables -fasynchronous-unwind-tables
+
 optflags: amd64 -O2 -g
 optflags: ia32e -O2 -g
 
@@ -148,6 +152,9 @@ archcolor: s390x 2
 archcolor: ia64 2
 
 archcolor: x86_64 2
+archcolor: x86_64_v2 2
+archcolor: x86_64_v3 2
+archcolor: x86_64_v4 2
 
 archcolor: sh3 1
 archcolor: sh4 1
@@ -168,6 +175,9 @@ arch_canon:	i586:	i586	1
 arch_canon:	i486:	i486	1
 arch_canon:	i386:	i386	1
 arch_canon:	x86_64:	x86_64	1
+arch_canon:	x86_64_v2:	x86_64_v2	1
+arch_canon:	x86_64_v3:	x86_64_v3	1
+arch_canon:	x86_64_v4:	x86_64_v4	1
 arch_canon:	amd64:	amd64	1
 arch_canon:	ia32e:	ia32e	1
 arch_canon:	em64t:	em64t	1
@@ -378,6 +388,9 @@ buildarchtranslate: s390x: s390x
 buildarchtranslate: ia64: ia64
 
 buildarchtranslate: x86_64: x86_64
+buildarchtranslate: x86_64_v2: x86_64
+buildarchtranslate: x86_64_v3: x86_64
+buildarchtranslate: x86_64_v4: x86_64
 buildarchtranslate: amd64: x86_64
 buildarchtranslate: ia32e: x86_64
 
@@ -504,6 +517,9 @@ arch_compat: ia64: i686 noarch
 arch_compat: x86_64: amd64 em64t athlon noarch
 arch_compat: amd64: x86_64 em64t athlon noarch
 arch_compat: ia32e: x86_64 em64t athlon noarch
+arch_compat: x86_64_v2: x86_64 amd64 em64t athlon noarch
+arch_compat: x86_64_v3: x86_64_v2 x86_64 amd64 em64t athlon noarch
+arch_compat: x86_64_v4: x86_64_v3 x86_64_v2 x86_64 amd64 em64t athlon noarch
 
 arch_compat: sh3: noarch
 arch_compat: sh4: noarch
@@ -640,6 +656,9 @@ buildarch_compat: s390x: noarch
 
 buildarch_compat: ia64: noarch
 
+buildarch_compat: x86_64_v4: x86_64_v3
+buildarch_compat: x86_64_v3: x86_64_v2
+buildarch_compat: x86_64_v2: x86_64
 buildarch_compat: x86_64: noarch
 buildarch_compat: amd64: x86_64
 buildarch_compat: ia32e: x86_64