From 480742da43c5f58129543d0f8c84627fb5a17b68 Mon Sep 17 00:00:00 2001 From: Aleksandr Voron Date: Wed, 18 Dec 2024 11:45:50 +0100 Subject: [PATCH] [ARM] Revert #2235 "fix, build, docs: aarch64: mutex lock if the ACL kernel is not stateless" --- .github/automation/build_acl.sh | 2 +- README.md | 2 +- cmake/ACL.cmake | 2 +- src/cpu/acl/matmul/acl_matmul.cpp | 12 +----------- src/cpu/acl/matmul/acl_matmul.hpp | 2 -- 5 files changed, 4 insertions(+), 16 deletions(-) diff --git a/.github/automation/build_acl.sh b/.github/automation/build_acl.sh index 7ed588618ff..0c45cc5a291 100755 --- a/.github/automation/build_acl.sh +++ b/.github/automation/build_acl.sh @@ -28,7 +28,7 @@ source ${SCRIPT_DIR}/common_aarch64.sh ACL_CONFIG=${ACL_CONFIG:-"Release"} ACL_ROOT_DIR=${ACL_ROOT_DIR:-"${PWD}/ComputeLibrary"} -ACL_VERSION=${ACL_VERSION:-v24.11.1} +ACL_VERSION=${ACL_VERSION:-v24.09} ACL_ARCH=${ACL_ARCH:-"armv8.2-a"} ACL_REPO="https://github.com/ARM-software/ComputeLibrary.git" diff --git a/README.md b/README.md index 7030985f013..15ed426b58d 100644 --- a/README.md +++ b/README.md @@ -173,7 +173,7 @@ On a CPU based on Arm AArch64 architecture, oneDNN CPU engine can be built with machine learning applications and provides AArch64 optimized implementations of core functions. This functionality currently requires that ACL is downloaded and built separately. See [Build from Source] section of the Developer Guide for -details. oneDNN only supports Compute Library versions 24.11.1 or later. +details. oneDNN only supports Compute Library versions 24.09 or later. [Arm Compute Library (ACL)]: https://github.com/arm-software/ComputeLibrary diff --git a/cmake/ACL.cmake b/cmake/ACL.cmake index d619e6f9226..d149b4724f9 100644 --- a/cmake/ACL.cmake +++ b/cmake/ACL.cmake @@ -31,7 +31,7 @@ endif() find_package(ACL REQUIRED) -set(ACL_MINIMUM_VERSION "24.11.1") +set(ACL_MINIMUM_VERSION "24.09") if(ACL_FOUND) file(GLOB_RECURSE ACL_VERSION_FILE ${ACL_INCLUDE_DIR}/*/arm_compute_version.embed) diff --git a/src/cpu/acl/matmul/acl_matmul.cpp b/src/cpu/acl/matmul/acl_matmul.cpp index 0075a7234e8..4de2762a3c6 100644 --- a/src/cpu/acl/matmul/acl_matmul.cpp +++ b/src/cpu/acl/matmul/acl_matmul.cpp @@ -15,7 +15,6 @@ *******************************************************************************/ #include "cpu/acl/matmul/acl_matmul.hpp" -#include namespace dnnl { namespace impl { @@ -176,16 +175,7 @@ status_t acl_matmul_t::execute_forward(const exec_ctx_t &ctx) const { auto src_base = CTX_IN_MEM(const data_t *, DNNL_ARG_SRC); auto wei_base = CTX_IN_MEM(const data_t *, DNNL_ARG_WEIGHTS); - const auto & = pd()->amp_; - - std::unique_lock locker {mtx_, std::defer_lock}; - - // Some of the underlying kernels used by ACL still require some state and - // are not safe to be called in parallel with different execution contexts. - // Eventually when all kernels are truly stateless, this guard can be - // removed. - if (!acl_obj_->asm_gemm.has_stateless_impl()) { locker.lock(); } - + auto amp = pd()->amp_; bool is_transA = amp.is_transA; bool is_transB = amp.is_transB; bool do_transC = amp.do_transC; diff --git a/src/cpu/acl/matmul/acl_matmul.hpp b/src/cpu/acl/matmul/acl_matmul.hpp index bf22dd40c1e..ec68c4a1afc 100644 --- a/src/cpu/acl/matmul/acl_matmul.hpp +++ b/src/cpu/acl/matmul/acl_matmul.hpp @@ -17,7 +17,6 @@ #ifndef ACL_MATMUL_HPP #define ACL_MATMUL_HPP -#include #include "common/utils.hpp" #include "cpu/acl/acl_post_ops.hpp" #include "cpu/acl/matmul/acl_matmul_utils.hpp" @@ -72,7 +71,6 @@ struct acl_matmul_t : public primitive_t { const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); } std::unique_ptr acl_obj_; - mutable std::mutex mtx_; }; // acl_matmul_t } // namespace matmul