Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add feature detection for ARM/MacOS #41924

Merged
merged 22 commits into from
Feb 15, 2022
Merged
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 55 additions & 19 deletions src/processor_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
# undef USE_DYN_GETAUXVAL
# include <sys/auxv.h>
# endif
#elif defined _CPU_AARCH64_ && defined _OS_DARWIN_
#include <sys/sysctl.h>
#include <string.h>
#endif

namespace ARM {
Expand Down Expand Up @@ -160,6 +163,8 @@ enum class CPU : uint32_t {
apple_a11,
apple_a12,
apple_a13,
apple_a14,
apple_m1,
apple_s4,
apple_s5,

Expand Down Expand Up @@ -240,6 +245,7 @@ constexpr auto armv8_3a_crypto = armv8_3a | get_feature_masks(aes, sha2);
constexpr auto armv8_4a = armv8_3a | get_feature_masks(v8_4a, dit, rcpc_immo, flagm);
constexpr auto armv8_4a_crypto = armv8_4a | get_feature_masks(aes, sha2);
constexpr auto armv8_5a = armv8_4a | get_feature_masks(v8_5a, sb, ccdp, altnzcv, fptoint);
constexpr auto armv8_5a_crypto = armv8_5a | get_feature_masks(aes, sha2);
constexpr auto armv8_6a = armv8_5a | get_feature_masks(v8_6a, i8mm, bf16);

// For ARM cores, the features required can be found in the technical reference manual
Expand Down Expand Up @@ -342,6 +348,10 @@ constexpr auto apple_a10 = armv8a_crc_crypto | get_feature_masks(rdm);
constexpr auto apple_a11 = armv8_2a_crypto | get_feature_masks(fullfp16);
constexpr auto apple_a12 = armv8_3a_crypto | get_feature_masks(fullfp16);
constexpr auto apple_a13 = armv8_4a_crypto | get_feature_masks(fp16fml, fullfp16, sha3);
constexpr auto apple_a14 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3);
constexpr auto apple_m1 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3);
// Features based on https://github.com/llvm/llvm-project/blob/82507f1798768280cf5d5aab95caaafbc7fe6f47/llvm/include/llvm/Support/AArch64TargetParser.def
// and sysctl -a hw.optional
constexpr auto apple_s4 = apple_a12;
constexpr auto apple_s5 = apple_a12;

Expand Down Expand Up @@ -420,6 +430,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
{"apple-a11", CPU::apple_a11, CPU::generic, 100000, Feature::apple_a11},
{"apple-a12", CPU::apple_a12, CPU::generic, 100000, Feature::apple_a12},
{"apple-a13", CPU::apple_a13, CPU::generic, 100000, Feature::apple_a13},
{"apple-a14", CPU::apple_a14, CPU::apple_a13, 120000, Feature::apple_a14},
{"apple-m1", CPU::apple_m1, CPU::apple_a14, 130000, Feature::apple_m1},
{"apple-s4", CPU::apple_s4, CPU::generic, 100000, Feature::apple_s4},
{"apple-s5", CPU::apple_s5, CPU::generic, 100000, Feature::apple_s5},
{"thunderx3t110", CPU::marvell_thunderx3t110, CPU::cavium_thunderx2t99, 110000,
Expand Down Expand Up @@ -662,13 +674,43 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
{"exynos-m2", CPU::samsung_exynos_m2, CPU::generic, UINT32_MAX, Feature::samsung_exynos_m2},
{"exynos-m3", CPU::samsung_exynos_m3, CPU::generic, 0, Feature::samsung_exynos_m3},
{"exynos-m4", CPU::samsung_exynos_m4, CPU::generic, 0, Feature::samsung_exynos_m4},
{"exynos-m5", CPU::samsung_exynos_m5, CPU::samsung_exynos_m4, 110000,
Feature::samsung_exynos_m5},
{"exynos-m5", CPU::samsung_exynos_m5, CPU::samsung_exynos_m4, 110000, Feature::samsung_exynos_m5},
{"apple-a7", CPU::apple_a7, CPU::generic, 0, Feature::apple_a7},
};
#endif
static constexpr size_t ncpu_names = sizeof(cpus) / sizeof(cpus[0]);

static inline const CPUSpec<CPU,feature_sz> *find_cpu(uint32_t cpu)
{
return ::find_cpu(cpu, cpus, ncpu_names);
}

static inline const CPUSpec<CPU,feature_sz> *find_cpu(llvm::StringRef name)
{
return ::find_cpu(name, cpus, ncpu_names);
}

static inline const char *find_cpu_name(uint32_t cpu)
{
return ::find_cpu_name(cpu, cpus, ncpu_names);
}

#if defined _CPU_AARCH64_ && defined _OS_DARWIN_

static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
{
char buffer[128];
size_t bufferlen = 128;
sysctlbyname("machdep.cpu.brand_string",&buffer,&bufferlen,NULL,0);
Copy link
Contributor

@yuyichao yuyichao Dec 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a good function to keep for future reference, but the fallback should be CPU::apple_m1. As the code is currently written, the next generation of chip will be detected as generic and I don't think that's desired. The linux version gets around this as much as possible by doing a full feature detection (so the only thing missing would be scheduling model that we can't do that much about...) but there's nothing like that here. I highly doubt apple will release a new processor for mac that has fewer userspace CPU features than M1 so it should be safe to assume so. And it seems to be what other projects assumes as well.


if(strcmp(buffer,"Apple M1") == 0)
vchuravy marked this conversation as resolved.
Show resolved Hide resolved
return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
else
return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
}

#else

// auxval reader

#ifndef AT_HWCAP
Expand Down Expand Up @@ -974,7 +1016,7 @@ static CPU get_cpu_name(CPUID cpuid)
default: return CPU::generic;
}
case 0x61: // 'a': Apple
// https://opensource.apple.com/source/xnu/xnu-6153.81.5/osfmk/arm/cpuid.h.auto.html
// https://opensource.apple.com/source/xnu/xnu-7195.141.2/osfmk/arm/cpuid.h.auto.html
switch (cpuid.part) {
case 0x0: // Swift
return CPU::apple_swift;
Expand Down Expand Up @@ -1002,6 +1044,12 @@ static CPU get_cpu_name(CPUID cpuid)
case 0x12: // Lightning
case 0x13: // Thunder
return CPU::apple_a13;
case 0x20: // Icestorm
case 0x21: // Firestorm
return CPU::apple_a14;
case 0x22: // Icestorm m1
case 0x23: // Firestorm m1
return CPU::apple_m1;
default: return CPU::generic;
}
case 0x68: // 'h': Huaxintong Semiconductor
Expand All @@ -1019,6 +1067,9 @@ static CPU get_cpu_name(CPUID cpuid)
}
}




namespace {

struct arm_arch {
Expand Down Expand Up @@ -1062,21 +1113,6 @@ static arm_arch get_elf_arch(void)
#endif
}

static inline const CPUSpec<CPU,feature_sz> *find_cpu(uint32_t cpu)
{
return ::find_cpu(cpu, cpus, ncpu_names);
}

static inline const CPUSpec<CPU,feature_sz> *find_cpu(llvm::StringRef name)
{
return ::find_cpu(name, cpus, ncpu_names);
}

static inline const char *find_cpu_name(uint32_t cpu)
{
return ::find_cpu_name(cpu, cpus, ncpu_names);
}

static arm_arch feature_arch_version(const FeatureList<feature_sz> &feature)
{
#ifdef _CPU_AARCH64_
Expand Down Expand Up @@ -1303,9 +1339,9 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
}
// Ignore feature bits that we are not interested in.
mask_features(feature_masks, &features[0]);

return std::make_pair(cpu, features);
}
#endif

static inline const std::pair<uint32_t,FeatureList<feature_sz>> &get_host_cpu()
{
Expand Down