Skip to content

Commit

Permalink
fix, build, docs: aarch64: mutex lock if the ACL kernel is not stateless
Browse files Browse the repository at this point in the history
This feature depends on ACL >= 24.11.1 so the build system and docs have
been updated to reflect that.

Signed-off-by: Siddhartha Menon <[email protected]>
(cherry picked from commit f30310e)
Sqvid authored and mgouicem committed Dec 4, 2024

Verified

This commit was signed with the committer’s verified signature.
codebytere Shelley Vohr
1 parent a79a486 commit 4d962e7
Showing 5 changed files with 16 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .github/automation/build_acl.sh
Original file line number Diff line number Diff line change
@@ -28,7 +28,7 @@ source ${SCRIPT_DIR}/common_aarch64.sh

ACL_CONFIG=${ACL_CONFIG:-"Release"}
ACL_ROOT_DIR=${ACL_ROOT_DIR:-"${PWD}/ComputeLibrary"}
ACL_VERSION=${ACL_VERSION:-v24.09}
ACL_VERSION=${ACL_VERSION:-v24.11.1}
ACL_ARCH=${ACL_ARCH:-"armv8.2-a"}
ACL_REPO="https://github.com/ARM-software/ComputeLibrary.git"

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -173,7 +173,7 @@ On a CPU based on Arm AArch64 architecture, oneDNN CPU engine can be built with
machine learning applications and provides AArch64 optimized implementations
of core functions. This functionality currently requires that ACL is downloaded
and built separately. See [Build from Source] section of the Developer Guide for
details. oneDNN only supports Compute Library versions 24.08.1 or later.
details. oneDNN only supports Compute Library versions 24.11.1 or later.

[Arm Compute Library (ACL)]: https://github.com/arm-software/ComputeLibrary

2 changes: 1 addition & 1 deletion cmake/ACL.cmake
Original file line number Diff line number Diff line change
@@ -31,7 +31,7 @@ endif()

find_package(ACL REQUIRED)

set(ACL_MINIMUM_VERSION "24.08.1")
set(ACL_MINIMUM_VERSION "24.11.1")

if(ACL_FOUND)
file(GLOB_RECURSE ACL_VERSION_FILE ${ACL_INCLUDE_DIR}/*/arm_compute_version.embed)
13 changes: 11 additions & 2 deletions src/cpu/aarch64/matmul/acl_matmul.cpp
Original file line number Diff line number Diff line change
@@ -15,6 +15,7 @@
*******************************************************************************/

#include "cpu/aarch64/matmul/acl_matmul.hpp"
#include <mutex>

namespace dnnl {
namespace impl {
@@ -171,12 +172,20 @@ status_t acl_matmul_t::pd_t::init(engine_t *engine) {

template <bool IsFixedFormat>
status_t acl_matmul_t::execute_forward(const exec_ctx_t &ctx) const {

status_t status = status::success;
auto src_base = CTX_IN_MEM(const data_t *, DNNL_ARG_SRC);
auto wei_base = CTX_IN_MEM(const data_t *, DNNL_ARG_WEIGHTS);

auto amp = pd()->amp_;
const auto &amp = pd()->amp_;

std::unique_lock<std::mutex> locker {mtx_, std::defer_lock};

// Some of the underlying kernels used by ACL still require some state and
// are not safe to be called in parallel with different execution contexts.
// Eventually when all kernels are truly stateless, this guard can be
// removed.
if (!acl_obj_->asm_gemm.has_stateless_impl()) { locker.lock(); }

bool is_transA = amp.is_transA;
bool is_transB = amp.is_transB;
bool do_transC = amp.do_transC;
2 changes: 2 additions & 0 deletions src/cpu/aarch64/matmul/acl_matmul.hpp
Original file line number Diff line number Diff line change
@@ -17,6 +17,7 @@
#ifndef ACL_MATMUL_HPP
#define ACL_MATMUL_HPP

#include <mutex>
#include "common/utils.hpp"
#include "cpu/aarch64/acl_post_ops.hpp"
#include "cpu/aarch64/matmul/acl_matmul_utils.hpp"
@@ -71,6 +72,7 @@ struct acl_matmul_t : public primitive_t {
const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); }

std::unique_ptr<acl_matmul_obj_t> acl_obj_;
mutable std::mutex mtx_;
}; // acl_matmul_t

} // namespace matmul

0 comments on commit 4d962e7

Please sign in to comment.