Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OneDNN rls-v3.6.2 #2240

Merged
merged 25 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
db5e699
cpu : aarch64 : reorder : reenabled bf16 jit uni reorders
Shreyas-fuj Sep 20, 2024
ec935d0
cpu : aarch64 : reorder : zp correction for 4d shapes (#2224)
Shreyas-fuj Nov 26, 2024
d66a391
aarch64: fix jit_brgemm warnings
taoye9 Sep 24, 2024
583215d
aarch64: fix out-of-bound warnings of deconvolution
taoye9 Sep 25, 2024
b3be239
cpu: aarch64: fix acl matmul dim guard for 4d tensor broadcast
taoye9 Oct 15, 2024
872ecac
src: cpu: aarch64: Enable jit bf16 -> f32 reorder (#2206)
aditew01 Nov 19, 2024
b8bdd63
cpu: aarch64: enable bf16f32 matmul
aditew01 Nov 8, 2024
373bc5b
test: aarch64: Fix unimplemented error when --cpu-isa-hints=prefer_ym…
Ryo-not-rio Nov 19, 2024
0de7263
cpu: aarch64: Re-enable ACL indirect conv for BF16
fadara01 Nov 18, 2024
f36b049
cpu: aarch64: Expand brgemm aarch64 unsupported cases handling mechan…
Radu2k Sep 30, 2024
ac9c3d0
cpu: aarch64: brgemm: Fix unimplemented conditions for brgemm (#2148)
Ryo-not-rio Oct 4, 2024
a13e099
github: workflows: harden GitHub actions
step-security-bot Sep 20, 2024
a37c19d
github: workflows: Add macos smoke tests
theComputeKid Sep 23, 2024
a477aef
cpu: aarch64: turn on ci DONEDNN_WERROR
taoye9 Sep 25, 2024
4273854
github: workflows: Add initial linux aarch64 runners
theComputeKid Sep 27, 2024
2056fe0
github: workflows: Enable aarch64 CI (#2142)
theComputeKid Oct 3, 2024
60c0002
github: workflows: Enable ACL caching for aarch64 ci
theComputeKid Oct 9, 2024
a98722c
github: workflows: disable testcase for graph mqa on macos aarch64 CI
taoye9 Oct 22, 2024
5488e8c
cmake: ignore GCC specific flags that produce false-positives
dzarukin Nov 19, 2024
3dd6aa6
common: utils: array_copy: remove specific diagnostics from the function
dzarukin Nov 19, 2024
7f23c16
github: workflows: bump actions/checkout from 4.1.7 to 4.2.2
dependabot[bot] Nov 25, 2024
937c658
fixup: common: utils: array_copy: remove specific diagnostics from th…
dzarukin Nov 25, 2024
be16835
gtests: graph: workaround gcc compilation issue
dzarukin Nov 25, 2024
ccb3ef1
github: workflows: Reduce MacOS CI configs for faster testing
theComputeKid Nov 22, 2024
e192596
fix, build, docs: aarch64: mutex lock if the ACL kernel is not stateless
Sqvid Nov 28, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 17 additions & 11 deletions .github/automation/build_acl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ source ${SCRIPT_DIR}/common_aarch64.sh

ACL_CONFIG=${ACL_CONFIG:-"Release"}
ACL_ROOT_DIR=${ACL_ROOT_DIR:-"${PWD}/ComputeLibrary"}
ACL_VERSION=${ACL_VERSION:-v24.09}
ACL_VERSION=${ACL_VERSION:-v24.11.1}
ACL_ARCH=${ACL_ARCH:-"armv8.2-a"}
ACL_REPO="https://github.com/ARM-software/ComputeLibrary.git"

Expand Down Expand Up @@ -61,13 +61,19 @@ fi
echo "Compiler version:"
$CC --version

set -x
git clone --branch $ACL_VERSION --depth 1 $ACL_REPO $ACL_ROOT_DIR

cd $ACL_ROOT_DIR

scons $MP Werror=0 debug=$ACL_DEBUG neon=1 opencl=0 embed_kernels=0 \
os=$ACL_OS arch=$ACL_ARCH build=native multi_isa=$ACL_MULTI_ISA_SUPPORT \
fixed_format_kernels=1 cppthreads=0 openmp=$ACL_OPENMP examples=0 \
validation_tests=0
set +x
if [[ "$ACL_ACTION" == "clone" ]]; then
set -x
git clone --branch $ACL_VERSION --depth 1 $ACL_REPO $ACL_ROOT_DIR
set +x
elif [[ "$ACL_ACTION" == "build" ]]; then
cd $ACL_ROOT_DIR
set -x
scons $MP Werror=0 debug=$ACL_DEBUG neon=1 opencl=0 embed_kernels=0 \
os=$ACL_OS arch=$ACL_ARCH build=native multi_isa=$ACL_MULTI_ISA_SUPPORT \
fixed_format_kernels=1 cppthreads=0 openmp=$ACL_OPENMP examples=0 \
validation_tests=0
set +x
else
echo "Unknown action: $ACL_ACTION"
exit 1
fi
24 changes: 21 additions & 3 deletions .github/automation/test_aarch64.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
# Defines MP, CC, CXX and OS.
source ${SCRIPT_DIR}/common_aarch64.sh

# Skip tests for certain config to preserve resources, while maintaining
# coverage. Skip:
# Skip tests for certain config to preserve resources, while maintaining
# coverage. Skip:
# (SEQ,CLANG)
# (OMP,CLANG,DEBUG)
SKIP_TESTS=0
Expand All @@ -36,7 +36,7 @@ if [[ "$OS" == "Linux" ]]; then
if [[ "$BUILD_TOOLSET" == "clang" ]]; then
SKIP_TESTS=1
fi
elif [[ "$ONEDNN_THREADING" == "OMP" ]]; then
elif [[ "$ONEDNN_THREADING" == "OMP" ]]; then
if [[ "$BUILD_TOOLSET" == "clang" ]]; then
if [[ "$CMAKE_BUILD_TYPE" == "Debug" ]]; then
SKIP_TESTS=1
Expand All @@ -62,20 +62,38 @@ if [[ "$OS" == "Linux" ]]; then
SKIPPED_TEST_FAILURES+="|test_benchdnn_modeC_conv_smoke_cpu"
SKIPPED_TEST_FAILURES+="|test_benchdnn_modeC_deconv_smoke_cpu"
SKIPPED_TEST_FAILURES+="|test_benchdnn_modeC_matmul_smoke_cpu"
SKIPPED_TEST_FAILURES+="|cpu-graph-gqa-cpp"
SKIPPED_TEST_FAILURES+="|cpu-graph-mqa-cpp"
SKIPPED_TEST_FAILURES+="|cpu-graph-sdpa-cpp"
SKIPPED_TEST_FAILURES+="|cpu-graph-sdpa-stacked-qkv-cpp"
SKIPPED_TEST_FAILURES+="|test_graph_unit_dnnl_large_partition_usm_cpu"
SKIPPED_TEST_FAILURES+="|test_graph_unit_dnnl_sdp_decomp_usm_cpu"
SKIPPED_TEST_FAILURES+="|test_graph_unit_dnnl_mqa_decomp_usm_cpu"
elif [[ "$CMAKE_BUILD_TYPE" == "Release" ]]; then
SKIPPED_TEST_FAILURES="cpu-primitives-deconvolution-cpp"
SKIPPED_TEST_FAILURES+="|test_benchdnn_modeC_lnorm_smoke_cpu"
SKIPPED_TEST_FAILURES+="|cpu-graph-gqa-cpp"
SKIPPED_TEST_FAILURES+="|cpu-graph-mqa-cpp"
SKIPPED_TEST_FAILURES+="|cpu-graph-sdpa-cpp"
SKIPPED_TEST_FAILURES+="|cpu-graph-sdpa-stacked-qkv-cpp"
SKIPPED_TEST_FAILURES+="|test_graph_unit_dnnl_large_partition_usm_cpu"
SKIPPED_TEST_FAILURES+="|test_graph_unit_dnnl_sdp_decomp_usm_cpu"
SKIPPED_TEST_FAILURES+="|test_graph_unit_dnnl_mqa_decomp_usm_cpu"
fi
elif [[ "$OS" == "Darwin" ]]; then
if [[ "$CMAKE_BUILD_TYPE" == "Debug" ]]; then
SKIPPED_TEST_FAILURES="cpu-primitives-deconvolution-cpp"
SKIPPED_TEST_FAILURES+="|test_benchdnn_modeC_lnorm_smoke_cpu"
SKIPPED_TEST_FAILURES+="|test_benchdnn_modeC_brgemm_smoke_cpu"
SKIPPED_TEST_FAILURES+="|test_benchdnn_modeC_brgemm_ci_cpu"
SKIPPED_TEST_FAILURES+="|test_graph_unit_dnnl_sdp_decomp_usm_cpu"
SKIPPED_TEST_FAILURES+="|test_graph_unit_dnnl_mqa_decomp_usm_cpu"
elif [[ "$CMAKE_BUILD_TYPE" == "Release" ]]; then
SKIPPED_TEST_FAILURES="cpu-primitives-deconvolution-cpp"
SKIPPED_TEST_FAILURES+="|test_benchdnn_modeC_lnorm_smoke_cpu"
SKIPPED_TEST_FAILURES+="|test_benchdnn_modeC_lnorm_ci_cpu"
SKIPPED_TEST_FAILURES+="|test_graph_unit_dnnl_sdp_decomp_usm_cpu"
SKIPPED_TEST_FAILURES+="|test_graph_unit_dnnl_mqa_decomp_usm_cpu"
fi
fi

Expand Down
104 changes: 87 additions & 17 deletions .github/workflows/ci-aarch64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,24 +27,26 @@ on:
#* Stop stale workflows when pull requests are updated: https://stackoverflow.com/a/70972844
#* Does not apply to the main branch.
concurrency:
group: ${{ github.ref }}
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}

# Declare default permissions as read only.
permissions: read-all

jobs:
macos:
name: macOS
runs-on: macos-14
strategy:
matrix:
toolset: [clang, gcc]
config: [Debug, Release]
config: [
{ toolset: clang, build: Release },
{ toolset: gcc, build: Release }
]

name: MacOS, ${{ matrix.config.toolset }}, ${{ matrix.config.build }}
steps:
- name: Checkout oneDNN
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
path: oneDNN

Expand All @@ -53,31 +55,65 @@ jobs:
with:
packages: scons

- name: Clone ACL
run: ${{ github.workspace }}/oneDNN/.github/automation/build_acl.sh
env:
ACL_ACTION: clone
ACL_CONFIG: ${{ matrix.config.build }}
ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary
BUILD_TOOLSET: ${{ matrix.config.toolset }}
GCC_VERSION: 14

- name: Get ACL commit hash for cache key
id: get_acl_commit_hash
run: (cd ${{ github.workspace }}/ComputeLibrary && echo "ACLCommitHash=$(git rev-parse --short HEAD)") >> $GITHUB_OUTPUT

- name: Get system name
id: get_system_name
run: (echo "SystemName=$(uname)") >> $GITHUB_OUTPUT

- name: Restore cached ACL
id: cache-acl-restore
uses: actions/cache/restore@v4
with:
key: ${{ steps.get_system_name.outputs.SystemName }}-acl-${{ matrix.config.toolset }}-${{ matrix.config.build }}-${{ steps.get_acl_commit_hash.outputs.ACLCommitHash }}
path: ${{ github.workspace }}/ComputeLibrary/build

- name: Build ACL
if: ${{ steps.cache-acl-restore.outputs.cache-hit != 'true' }}
run: ${{ github.workspace }}/oneDNN/.github/automation/build_acl.sh
env:
ACL_ACTION: build
ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary
BUILD_TOOLSET: ${{ matrix.toolset }}
ACL_CONFIG: ${{ matrix.config }}
BUILD_TOOLSET: ${{ matrix.config.toolset }}
ACL_CONFIG: ${{ matrix.config.build }}
GCC_VERSION: 14

- name: Save ACL in cache
if: ${{ steps.cache-acl-restore.outputs.cache-hit != 'true' }}
id: cache-acl_build-save
uses: actions/cache/save@v4
with:
key: ${{ steps.cache-acl-restore.outputs.cache-primary-key }}
path: ${{ github.workspace }}/ComputeLibrary/build

- name: Build oneDNN
run: ${{ github.workspace }}/oneDNN/.github/automation/build_aarch64.sh
working-directory: ${{ github.workspace }}/oneDNN
env:
ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary
BUILD_TOOLSET: ${{ matrix.toolset }}
CMAKE_BUILD_TYPE: ${{ matrix.config }}
BUILD_TOOLSET: ${{ matrix.config.toolset }}
CMAKE_BUILD_TYPE: ${{ matrix.config.build }}
GCC_VERSION: 14

- if: matrix.toolset == 'clang'
- if: matrix.config.toolset == 'clang'
name: Run oneDNN smoke tests
run: ${{ github.workspace }}/oneDNN/.github/automation/test_aarch64.sh
working-directory: ${{ github.workspace }}/oneDNN/build
env:
CMAKE_BUILD_TYPE: ${{ matrix.config }}
CMAKE_BUILD_TYPE: ${{ matrix.config.build }}
DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build

# We only run the linux aarch64 runners if macos smoke tests pass.
linux:
needs: macos
Expand All @@ -87,15 +123,15 @@ jobs:
toolset: [clang, gcc]
config: [Debug, Release]
host: [
{ name: c6g, label: ah-ubuntu_22_04-c6g_2x-50 },
{ name: c6g, label: ah-ubuntu_22_04-c6g_2x-50 },
{ name: c7g, label: ah-ubuntu_22_04-c7g_2x-50 }
]

name: ${{ matrix.host.name }}, ${{ matrix.toolset }}, ${{ matrix.threading }}, ${{ matrix.config }}
runs-on: ${{ matrix.host.label }}
steps:
- name: Checkout oneDNN
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
path: oneDNN

Expand All @@ -122,15 +158,49 @@ jobs:
with:
version: "17"

- name: Build ACL
- name: Clone ACL
run: ${{ github.workspace }}/oneDNN/.github/automation/build_acl.sh
env:
ACL_ACTION: clone
ACL_CONFIG: ${{ matrix.config }}
ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary
BUILD_TOOLSET: ${{ matrix.toolset }}
GCC_VERSION: 13

- name: Get ACL commit hash for cache key
id: get_acl_commit_hash
run: (cd ${{ github.workspace }}/ComputeLibrary && echo "ACLCommitHash=$(git rev-parse --short HEAD)") >> $GITHUB_OUTPUT

- name: Get system name
id: get_system_name
run: (echo "SystemName=$(uname)") >> $GITHUB_OUTPUT

- name: Restore cached ACL
id: cache-acl-restore
uses: actions/cache/restore@v4
with:
key: ${{ steps.get_system_name.outputs.SystemName }}-acl-${{ matrix.toolset }}-${{ matrix.config }}-${{ steps.get_acl_commit_hash.outputs.ACLCommitHash }}
path: ${{ github.workspace }}/ComputeLibrary/build

- name: Build ACL
if: ${{ steps.cache-acl-restore.outputs.cache-hit != 'true' }}
run: ${{ github.workspace }}/oneDNN/.github/automation/build_acl.sh
env:
ACL_ACTION: build
ACL_ROOT_DIR: ${{ github.workspace }}/ComputeLibrary
BUILD_TOOLSET: ${{ matrix.toolset }}
ACL_CONFIG: ${{ matrix.config }}
GCC_VERSION: 13
ACL_THREADING: ${{ matrix.threading }}

- name: Save ACL in cache
if: ${{ steps.cache-acl-restore.outputs.cache-hit != 'true' }}
id: cache-acl_build-save
uses: actions/cache/save@v4
with:
key: ${{ steps.cache-acl-restore.outputs.cache-primary-key }}
path: ${{ github.workspace }}/ComputeLibrary/build

- name: Build oneDNN
run: ${{ github.workspace }}/oneDNN/.github/automation/build_aarch64.sh
working-directory: ${{ github.workspace }}/oneDNN
Expand All @@ -148,4 +218,4 @@ jobs:
BUILD_TOOLSET: ${{ matrix.toolset }}
CMAKE_BUILD_TYPE: ${{ matrix.config }}
DYLD_LIBRARY_PATH: ${{ github.workspace }}/ComputeLibrary/build
ONEDNN_THREADING: ${{ matrix.threading }}
ONEDNN_THREADING: ${{ matrix.threading }}
2 changes: 1 addition & 1 deletion .github/workflows/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
pull-requests: write

steps:
- uses: actions/[email protected]
- uses: actions/labeler@8558fd74291d67161a8a78ce36a881fa63b766a9 # v5.0.0
with:
sync-labels: true
configuration-path: '.github/labels.yml'
2 changes: 1 addition & 1 deletion .github/workflows/openssf-scorecard.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:

steps:
- name: "Checkout code"
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false

Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# limitations under the License.
#===============================================================================

build
build*
external
.vs
.vscode
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ On a CPU based on Arm AArch64 architecture, oneDNN CPU engine can be built with
machine learning applications and provides AArch64 optimized implementations
of core functions. This functionality currently requires that ACL is downloaded
and built separately. See [Build from Source] section of the Developer Guide for
details. oneDNN only supports Compute Library versions 24.08.1 or later.
details. oneDNN only supports Compute Library versions 24.11.1 or later.

[Arm Compute Library (ACL)]: https://github.com/arm-software/ComputeLibrary

Expand Down
2 changes: 1 addition & 1 deletion cmake/ACL.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ endif()

find_package(ACL REQUIRED)

set(ACL_MINIMUM_VERSION "24.08.1")
set(ACL_MINIMUM_VERSION "24.11.1")

if(ACL_FOUND)
file(GLOB_RECURSE ACL_VERSION_FILE ${ACL_INCLUDE_DIR}/*/arm_compute_version.embed)
Expand Down
5 changes: 3 additions & 2 deletions cmake/platform.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,12 @@ macro(platform_gnu_nowarn_ccxx_flags var gnu_version)
append(${var} "-Wno-strict-overflow")
# suppress false positive warnings about uninitialized variables
append(${var} "-Wno-maybe-uninitialized")
# suppress false positive warnings with 10.x: GCC Bugzilla – Bug 96963
# suppress false positive warnings with 9.x+: GCC Bugzilla – Bug 96963
# assume 0.0 is unknown version - always suppress the warning
if(${gnu_version} VERSION_EQUAL 0.0 OR
(${gnu_version} VERSION_GREATER 10.0 AND ${gnu_version} VERSION_LESS 11.0))
${gnu_version} VERSION_GREATER 9.0)
append(${var} "-Wno-stringop-overflow")
append(${var} "-Wno-array-bounds")
endif()
endmacro()

Expand Down
8 changes: 0 additions & 8 deletions src/common/compiler_workarounds.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,6 @@
#define GCC_WA_NO_TREE_DOMINATOR_OPTS 0
#endif

// Workaround 04: GCC
//
// GCC 10 & 11 && 12 (at least versiona 10.1, 10.3 & 11.1, 12.2) report false positives
// in xbyak when -Warray-bounds build setting is on
#if (defined NEED_GCC_WA_CHECK) && (__GNUC__ >= 10)
#pragma GCC diagnostic ignored "-Warray-bounds"
#endif

// Workaround 05: GCC
//
// NOTE: inside lambda, type cast variables captured by reference using
Expand Down
19 changes: 19 additions & 0 deletions src/common/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,11 +209,30 @@ constexpr bool any_null(Args... ptrs) {
return one_of(nullptr, ptrs...);
}

// For some unknown reason, GCC 11.x and beyond can't compile specific places
// of the library that involve this routine. It's connected to the fact that
// this function is inline and defined in a header.
#if defined(__GNUC__) && __GNUC__ > 8 && !defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wrestrict"
// /usr/include/bits/string_fortified.h:29:33: warning: ‘void* __builtin_memcpy(
// void*, const void*, long unsigned int)’ accessing 18446744056529682432 or
// more bytes at offsets 320 and 0 overlaps 9223372002495037441 bytes at
// offset -9223372019674906625 [-Wrestrict]
#pragma GCC diagnostic ignored "-Wstringop-overflow"
// warning: ‘void* __builtin_memcpy(void*, const void*, long unsigned int)’
// specified bound between 18446744056529682432 and 18446744073709551608
// exceeds maximum object size 9223372036854775807 [-Wstringop-overflow=]
#endif
template <typename T>
inline void array_copy(T *dst, const T *src, size_t size) {
for (size_t i = 0; i < size; ++i)
dst[i] = src[i];
}
#if defined(__GNUC__) && __GNUC__ > 8 && !defined(__clang__)
#pragma GCC diagnostic pop
#endif

template <typename T>
inline bool array_cmp(const T *a1, const T *a2, size_t size) {
for (size_t i = 0; i < size; ++i)
Expand Down
Loading
Loading