From 5dfcf9f7359bed7108064b9756377f134cab83dd Mon Sep 17 00:00:00 2001 From: gbaraldi Date: Tue, 15 Feb 2022 10:17:15 -0300 Subject: [PATCH] Add feature detection for ARM/MacOS (#41924) --- src/processor_arm.cpp | 78 ++++++++++++++++++++++++++++++++----------- 1 file changed, 59 insertions(+), 19 deletions(-) diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index 0804f77a3a8894..f5cc2a42a48706 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -19,6 +19,9 @@ # undef USE_DYN_GETAUXVAL # include # endif +#elif defined _CPU_AARCH64_ && defined _OS_DARWIN_ +#include +#include #endif namespace ARM { @@ -160,6 +163,8 @@ enum class CPU : uint32_t { apple_a11, apple_a12, apple_a13, + apple_a14, + apple_m1, apple_s4, apple_s5, @@ -240,6 +245,7 @@ constexpr auto armv8_3a_crypto = armv8_3a | get_feature_masks(aes, sha2); constexpr auto armv8_4a = armv8_3a | get_feature_masks(v8_4a, dit, rcpc_immo, flagm); constexpr auto armv8_4a_crypto = armv8_4a | get_feature_masks(aes, sha2); constexpr auto armv8_5a = armv8_4a | get_feature_masks(v8_5a, sb, ccdp, altnzcv, fptoint); +constexpr auto armv8_5a_crypto = armv8_5a | get_feature_masks(aes, sha2); constexpr auto armv8_6a = armv8_5a | get_feature_masks(v8_6a, i8mm, bf16); // For ARM cores, the features required can be found in the technical reference manual @@ -342,6 +348,10 @@ constexpr auto apple_a10 = armv8a_crc_crypto | get_feature_masks(rdm); constexpr auto apple_a11 = armv8_2a_crypto | get_feature_masks(fullfp16); constexpr auto apple_a12 = armv8_3a_crypto | get_feature_masks(fullfp16); constexpr auto apple_a13 = armv8_4a_crypto | get_feature_masks(fp16fml, fullfp16, sha3); +constexpr auto apple_a14 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3); +constexpr auto apple_m1 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3); +// Features based on https://github.com/llvm/llvm-project/blob/82507f1798768280cf5d5aab95caaafbc7fe6f47/llvm/include/llvm/Support/AArch64TargetParser.def +// and sysctl -a hw.optional constexpr auto apple_s4 = apple_a12; constexpr auto apple_s5 = apple_a12; @@ -420,6 +430,8 @@ static constexpr CPUSpec cpus[] = { {"apple-a11", CPU::apple_a11, CPU::generic, 100000, Feature::apple_a11}, {"apple-a12", CPU::apple_a12, CPU::generic, 100000, Feature::apple_a12}, {"apple-a13", CPU::apple_a13, CPU::generic, 100000, Feature::apple_a13}, + {"apple-a14", CPU::apple_a14, CPU::apple_a13, 120000, Feature::apple_a14}, + {"apple-m1", CPU::apple_m1, CPU::apple_a14, 130000, Feature::apple_m1}, {"apple-s4", CPU::apple_s4, CPU::generic, 100000, Feature::apple_s4}, {"apple-s5", CPU::apple_s5, CPU::generic, 100000, Feature::apple_s5}, {"thunderx3t110", CPU::marvell_thunderx3t110, CPU::cavium_thunderx2t99, 110000, @@ -662,13 +674,47 @@ static constexpr CPUSpec cpus[] = { {"exynos-m2", CPU::samsung_exynos_m2, CPU::generic, UINT32_MAX, Feature::samsung_exynos_m2}, {"exynos-m3", CPU::samsung_exynos_m3, CPU::generic, 0, Feature::samsung_exynos_m3}, {"exynos-m4", CPU::samsung_exynos_m4, CPU::generic, 0, Feature::samsung_exynos_m4}, - {"exynos-m5", CPU::samsung_exynos_m5, CPU::samsung_exynos_m4, 110000, - Feature::samsung_exynos_m5}, + {"exynos-m5", CPU::samsung_exynos_m5, CPU::samsung_exynos_m4, 110000, Feature::samsung_exynos_m5}, {"apple-a7", CPU::apple_a7, CPU::generic, 0, Feature::apple_a7}, }; #endif static constexpr size_t ncpu_names = sizeof(cpus) / sizeof(cpus[0]); +static inline const CPUSpec *find_cpu(uint32_t cpu) +{ + return ::find_cpu(cpu, cpus, ncpu_names); +} + +static inline const CPUSpec *find_cpu(llvm::StringRef name) +{ + return ::find_cpu(name, cpus, ncpu_names); +} + +static inline const char *find_cpu_name(uint32_t cpu) +{ + return ::find_cpu_name(cpu, cpus, ncpu_names); +} + +#if defined _CPU_AARCH64_ && defined _OS_DARWIN_ + +static NOINLINE std::pair> _get_host_cpu() +{ + char buffer[128]; + size_t bufferlen = 128; + sysctlbyname("machdep.cpu.brand_string",&buffer,&bufferlen,NULL,0); + + if(strcmp(buffer,"Apple M1") == 0) + return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1); + else if(strcmp(buffer,"Apple M1 Max") == 0) + return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1); + else if(strcmp(buffer,"Apple M1 Pro") == 0) + return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1); + else + return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1); +} + +#else + // auxval reader #ifndef AT_HWCAP @@ -974,7 +1020,7 @@ static CPU get_cpu_name(CPUID cpuid) default: return CPU::generic; } case 0x61: // 'a': Apple - // https://opensource.apple.com/source/xnu/xnu-6153.81.5/osfmk/arm/cpuid.h.auto.html + // https://opensource.apple.com/source/xnu/xnu-7195.141.2/osfmk/arm/cpuid.h.auto.html switch (cpuid.part) { case 0x0: // Swift return CPU::apple_swift; @@ -1002,6 +1048,12 @@ static CPU get_cpu_name(CPUID cpuid) case 0x12: // Lightning case 0x13: // Thunder return CPU::apple_a13; + case 0x20: // Icestorm + case 0x21: // Firestorm + return CPU::apple_a14; + case 0x22: // Icestorm m1 + case 0x23: // Firestorm m1 + return CPU::apple_m1; default: return CPU::generic; } case 0x68: // 'h': Huaxintong Semiconductor @@ -1019,6 +1071,9 @@ static CPU get_cpu_name(CPUID cpuid) } } + + + namespace { struct arm_arch { @@ -1062,21 +1117,6 @@ static arm_arch get_elf_arch(void) #endif } -static inline const CPUSpec *find_cpu(uint32_t cpu) -{ - return ::find_cpu(cpu, cpus, ncpu_names); -} - -static inline const CPUSpec *find_cpu(llvm::StringRef name) -{ - return ::find_cpu(name, cpus, ncpu_names); -} - -static inline const char *find_cpu_name(uint32_t cpu) -{ - return ::find_cpu_name(cpu, cpus, ncpu_names); -} - static arm_arch feature_arch_version(const FeatureList &feature) { #ifdef _CPU_AARCH64_ @@ -1303,9 +1343,9 @@ static NOINLINE std::pair> _get_host_cpu() } // Ignore feature bits that we are not interested in. mask_features(feature_masks, &features[0]); - return std::make_pair(cpu, features); } +#endif static inline const std::pair> &get_host_cpu() {