Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add feature detection for ARM/MacOS #41924

Merged
merged 22 commits into from
Feb 15, 2022
Merged
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 45 additions & 1 deletion src/processor_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ enum class CPU : uint32_t {
apple_a11,
apple_a12,
apple_a13,
apple_a14,
apple_m1,
apple_s4,
apple_s5,

Expand Down Expand Up @@ -240,6 +242,7 @@ constexpr auto armv8_3a_crypto = armv8_3a | get_feature_masks(aes, sha2);
constexpr auto armv8_4a = armv8_3a | get_feature_masks(v8_4a, dit, rcpc_immo, flagm);
constexpr auto armv8_4a_crypto = armv8_4a | get_feature_masks(aes, sha2);
constexpr auto armv8_5a = armv8_4a | get_feature_masks(v8_5a, sb, ccdp, altnzcv, fptoint);
constexpr auto armv8_5a_crypto = armv8_5a | get_feature_masks(aes, sha2);
constexpr auto armv8_6a = armv8_5a | get_feature_masks(v8_6a, i8mm, bf16);

// For ARM cores, the features required can be found in the technical reference manual
Expand Down Expand Up @@ -342,6 +345,10 @@ constexpr auto apple_a10 = armv8a_crc_crypto | get_feature_masks(rdm);
constexpr auto apple_a11 = armv8_2a_crypto | get_feature_masks(fullfp16);
constexpr auto apple_a12 = armv8_3a_crypto | get_feature_masks(fullfp16);
constexpr auto apple_a13 = armv8_4a_crypto | get_feature_masks(fp16fml, fullfp16, sha3);
constexpr auto apple_a14 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3);
constexpr auto apple_m1 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3);
// Features based on https://github.com/llvm/llvm-project/blob/82507f1798768280cf5d5aab95caaafbc7fe6f47/llvm/include/llvm/Support/AArch64TargetParser.def
// and sysctl -a hw.optional
constexpr auto apple_s4 = apple_a12;
constexpr auto apple_s5 = apple_a12;

Expand Down Expand Up @@ -420,6 +427,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
{"apple-a11", CPU::apple_a11, CPU::generic, 100000, Feature::apple_a11},
{"apple-a12", CPU::apple_a12, CPU::generic, 100000, Feature::apple_a12},
{"apple-a13", CPU::apple_a13, CPU::generic, 100000, Feature::apple_a13},
{"apple-a14", CPU::apple_a14, CPU::apple_a13, 120000, Feature::apple_a14},
{"apple-m1", CPU::apple_m1, CPU::apple_a14, 130000, Feature::apple_m1},
{"apple-s4", CPU::apple_s4, CPU::generic, 100000, Feature::apple_s4},
{"apple-s5", CPU::apple_s5, CPU::generic, 100000, Feature::apple_s5},
{"thunderx3t110", CPU::marvell_thunderx3t110, CPU::cavium_thunderx2t99, 110000,
Expand Down Expand Up @@ -1002,6 +1011,12 @@ static CPU get_cpu_name(CPUID cpuid)
case 0x12: // Lightning
case 0x13: // Thunder
return CPU::apple_a13;
case 0x20: // Icestorm
case 0x21: // Firestorm
return CPU::apple_a14;
case 0x22: // Icestorm m1
case 0x23: // Firestorm m1
return CPU::apple_m1;
default: return CPU::generic;
}
case 0x68: // 'h': Huaxintong Semiconductor
Expand Down Expand Up @@ -1181,6 +1196,35 @@ static void shrink_big_little(std::vector<std::pair<uint32_t,CPUID>> &list,
}
}

#if defined _CPU_AARCH64_ && defined _OS_DARWIN_
static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
{
FeatureList<feature_sz> features = {};
Copy link
Contributor

@yuyichao yuyichao Dec 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return std::make_pair(CPU::apple_m1, Feature::apple_m1);

There's no need to go through the lookup; I doubt the ELF arch lookup applies to apple (and if anything it won't be elf arch...) and you'll never hit the empty case anyway; no need to lookup features from the CPU since you know what it is; there's also no need to mask any features since the extra features comes from auxv and you aren't getting that it from there.

You should be able to move arm_arch (along with the namespace), get_elf_arch, feature_arch_version, generic_for_arch, check_cpu_arch_ver, shrink_big_little, into the ifdef below as well.

Edit: also you can ifdef out old line 672 to old line 1020 but please keep the change you made in that range since it's for linux.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wrote some simpler smaller logic that also shouldn't make it too hard to add new apple cores when they launch someday.

CPUID info = {
uint8_t(0x61),
uint8_t(0),
uint16_t(0x23)
}; // Hardcoded Firestorm core data based on https://opensource.apple.com/source/xnu/xnu-7195.141.2/osfmk/arm/cpuid.h.auto.html
std::vector<std::pair<uint32_t,CPUID>> list;
auto name = (uint32_t)get_cpu_name(info);
auto arch = get_elf_arch();
features = find_cpu(name)->features;
list.emplace_back(name, info);
uint32_t cpu = 0;
if (list.empty()) {
cpu = (uint32_t)generic_for_arch(arch);
}
else {
// This also covers `list.size() > 1` case which means there's a unknown combination
// consists of CPU's we know. Unclear what else we could try so just randomly return
// one...
cpu = list[0].first;
}
// Ignore feature bits that we are not interested in.
mask_features(feature_masks, &features[0]);
return std::make_pair(cpu, features);
}
#else
static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
{
FeatureList<feature_sz> features = {};
Expand Down Expand Up @@ -1303,9 +1347,9 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
}
// Ignore feature bits that we are not interested in.
mask_features(feature_masks, &features[0]);

return std::make_pair(cpu, features);
}
#endif

static inline const std::pair<uint32_t,FeatureList<feature_sz>> &get_host_cpu()
{
Expand Down