Compute Library v24.11.1

ARM-software · Nov 30, 2024 · 1f3bf6b · 1f3bf6b
1 parent f44f09d
commit 1f3bf6b
Show file tree

Hide file tree

Showing 71 changed files with 24,018 additions and 25,763 deletions.
diff --git a/Android.bp b/Android.bp
@@ -426,6 +426,8 @@ cc_library_static {
         "src/cpu/kernels/CpuDirectConv2dKernel.cpp",
         "src/cpu/kernels/CpuDirectConv2dOutputStageKernel.cpp",
         "src/cpu/kernels/CpuDirectConv3dKernel.cpp",
+        "src/cpu/kernels/CpuDynamicGemmKernel.cpp",
+        "src/cpu/kernels/CpuDynamicGemmKernelHeuristics.cpp",
         "src/cpu/kernels/CpuElementwiseKernel.cpp",
         "src/cpu/kernels/CpuElementwiseUnaryKernel.cpp",
         "src/cpu/kernels/CpuFillKernel.cpp",
@@ -609,6 +611,7 @@ cc_library_static {
         "src/cpu/operators/CpuDequantize.cpp",
         "src/cpu/operators/CpuDirectConv2d.cpp",
         "src/cpu/operators/CpuDirectConv3d.cpp",
+        "src/cpu/operators/CpuDynamicGemm.cpp",
         "src/cpu/operators/CpuElementwise.cpp",
         "src/cpu/operators/CpuElementwiseUnary.cpp",
         "src/cpu/operators/CpuFill.cpp",

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -28,7 +28,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
 list(APPEND CMAKE_MESSAGE_CONTEXT ArmCompute)
 project(
   ArmCompute
-  VERSION 43.0.0
+  VERSION 44.0.0
   DESCRIPTION
     "The Arm Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A CPU and Arm® Mali™ GPU architectures"
   LANGUAGES C CXX ASM)
@@ -333,3 +333,16 @@ if(ARM_COMPUTE_BUILD_EXAMPLES)
   endforeach()
 
 endif() # ARM_COMPUTE_BUILD_EXAMPLES
+
+# Install libraries
+install(TARGETS arm_compute arm_compute_graph
+        LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+        ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
+
+# Install test executables
+if(ARM_COMPUTE_BUILD_TESTING)
+  install(TARGETS arm_compute_validation_framework arm_compute_benchmark arm_compute_validation
+          RUNTIME DESTINATION "${CMAKE_INSTALL_LIBDIR}/tests"
+          LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}/tests"
+          ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}/tests")
+endif()
diff --git a/README.md b/README.md
@@ -9,7 +9,7 @@
  <img src="https://raw.githubusercontent.com/ARM-software/ComputeLibrary/gh-pages/ACL_logo.png"/><br><br>
 </div>
 
-# Compute Library ![](https://img.shields.io/badge/latest_release-24.11-green)
+# Compute Library ![](https://img.shields.io/badge/latest_release-24.11.1-green)
 
 
 The Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A, Arm® Neoverse® and Arm® Mali™ GPUs architectures.<br>
@@ -37,7 +37,7 @@ Key Features:
 <br>
 
 ## Documentation
-[![Documentation](https://img.shields.io/badge/documentation-24.11-green)](https://artificial-intelligence.sites.arm.com/computelibrary/v24.11/index.xhtml)
+[![Documentation](https://img.shields.io/badge/documentation-24.11.1-green)](https://artificial-intelligence.sites.arm.com/computelibrary/v24.11.1/index.xhtml)
 
 > Note: The documentation includes the reference API, changelogs, build guide, contribution guide, errata, etc.
 
@@ -50,22 +50,22 @@ All the binaries can be downloaded from [here](https://github.com/ARM-software/C
 
 | Platform       | Operating System | Release archive (Download) |
 | -------------- | ---------------- | -------------------------- |
-| Raspberry Pi 4 | Linux® 32bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-armv7a-cpu-bin.tar.gz) |
-| Raspberry Pi 4 | Linux® 64bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-aarch64-cpu-bin.tar.gz) |
-| Odroid N2      | Linux® 64bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-aarch64-cpu-gpu-bin.tar.gz) |
-| HiKey960       | Linux® 64bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-aarch64-cpu-gpu-bin.tar.gz) |
+| Raspberry Pi 4 | Linux® 32bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-linux-armv7a-cpu-bin.tar.gz) |
+| Raspberry Pi 4 | Linux® 64bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-linux-aarch64-cpu-bin.tar.gz) |
+| Odroid N2      | Linux® 64bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-linux-aarch64-cpu-gpu-bin.tar.gz) |
+| HiKey960       | Linux® 64bit      | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-linux-aarch64-cpu-gpu-bin.tar.gz) |
 
 <br>
 
 | Architecture | Operating System | Release archive (Download) |
 | ------------ | ---------------- | -------------------------- |
-| armv7        | Linux®            | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-armv7a-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-armv7a-cpu-gpu-bin.tar.gz) |
-| arm64-v8a    | Android™          | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-android-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-android-aarch64-cpu-gpu-bin.tar.gz) |
-| arm64-v8a    | Linux®            | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-aarch64-cpu-gpu-bin.tar.gz) |
+| armv7        | Linux®            | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-linux-armv7a-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-linux-armv7a-cpu-gpu-bin.tar.gz) |
+| arm64-v8a    | Android™          | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-android-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-android-aarch64-cpu-gpu-bin.tar.gz) |
+| arm64-v8a    | Linux®            | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-linux-aarch64-cpu-gpu-bin.tar.gz) |
 
 <br>
 
-Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v24.11-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v24.11)
+Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v24.11.1-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v24.11.1)
 
 Pre-build binaries are generated with the following security / good coding practices related flags:
 > -Wall, -Wextra, -Wformat=2, -Winit-self, -Wstrict-overflow=2, -Wswitch-default, -Woverloaded-virtual, -Wformat-security, -Wctor-dtor-privacy, -Wsign-promo, -Weffc++, -pedantic, -fstack-protector-strong
@@ -107,13 +107,13 @@ Pre-build binaries are generated with the following security / good coding pract
 
 ## Experimental builds
 
-**⚠ Important** Bazel and CMake builds are experimental CPU only builds, please see the [documentation](https://artificial-intelligence.sites.arm.com/computelibrary/v24.11/how_to_build.xhtml) for more details.
+**⚠ Important** Bazel and CMake builds are experimental CPU only builds, please see the [documentation](https://artificial-intelligence.sites.arm.com/computelibrary/v24.11.1/how_to_build.xhtml) for more details.
 
 <br>
 
 ## How to contribute
 
-Contributions to the Compute Library are more than welcome. If you are interested on contributing, please have a look at our [how to contribute guidelines](https://artificial-intelligence.sites.arm.com/computelibrary/v24.11/contribution_guidelines.xhtml).
+Contributions to the Compute Library are more than welcome. If you are interested on contributing, please have a look at our [how to contribute guidelines](https://artificial-intelligence.sites.arm.com/computelibrary/v24.11.1/contribution_guidelines.xhtml).
 
 ### Developer Certificate of Origin (DCO)
 Before the Compute Library accepts your contribution, you need to certify its origin and give us your permission. To manage this process we use the Developer Certificate of Origin (DCO) V1.1 (https://developercertificate.org/)

diff --git a/SConscript b/SConscript
@@ -33,8 +33,8 @@ import codecs
 import platform
 import SCons
 
-VERSION = "v24.11"
-LIBRARY_VERSION_MAJOR = 43
+VERSION = "v24.11.1"
+LIBRARY_VERSION_MAJOR = 44
 LIBRARY_VERSION_MINOR = 0
 LIBRARY_VERSION_PATCH = 0
 SONAME_VERSION = str(LIBRARY_VERSION_MAJOR) + "." + str(LIBRARY_VERSION_MINOR) + "." + str(LIBRARY_VERSION_PATCH)

diff --git a/arm_compute/core/TensorShape.h b/arm_compute/core/TensorShape.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021, 2023 Arm Limited.
+ * Copyright (c) 2016-2021, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TENSORSHAPE_H
-#define ARM_COMPUTE_TENSORSHAPE_H
+#ifndef ACL_ARM_COMPUTE_CORE_TENSORSHAPE_H
+#define ACL_ARM_COMPUTE_CORE_TENSORSHAPE_H
 
 #include "arm_compute/core/Dimensions.h"
 #include "arm_compute/core/Error.h"
@@ -35,7 +35,12 @@
 
 namespace arm_compute
 {
-/** Shape of a tensor */
+/** Shape of a tensor.
+ *
+ * It is allowed to set one or several dimensions of a tensor shape to size 0.
+ * In this case the dimensions of size 0 and the whole tensor shape are
+ * considered dynamic.
+ */
 class TensorShape : public Dimensions<size_t>
 {
 public:
@@ -77,26 +82,17 @@ class TensorShape : public Dimensions<size_t>
      */
     TensorShape &set(size_t dimension, size_t value, bool apply_dim_correction = true, bool increase_dim_unit = true)
     {
-        // Clear entire shape if one dimension is zero
-        if (value == 0)
-        {
-            _num_dimensions = 0;
-            std::fill(_id.begin(), _id.end(), 0);
-        }
-        else
-        {
-            // Make sure all empty dimensions are filled with 1
-            std::fill(_id.begin() + _num_dimensions, _id.end(), 1);
+        // Make sure all empty dimensions are filled with 1
+        std::fill(_id.begin() + _num_dimensions, _id.end(), 1);
 
-            // Set the specified dimension and increase the number of dimensions if
-            // necessary
-            Dimensions::set(dimension, value, increase_dim_unit);
+        // Set the specified dimension and increase the number of dimensions if
+        // necessary
+        Dimensions::set(dimension, value, increase_dim_unit);
 
-            // Correct number dimensions to ignore trailing dimensions of size 1
-            if (apply_dim_correction)
-            {
-                apply_dimension_correction();
-            }
+        // Correct number dimensions to ignore trailing dimensions of size 1
+        if (apply_dim_correction)
+        {
+            apply_dimension_correction();
         }
         return *this;
     }
@@ -244,6 +240,33 @@ class TensorShape : public Dimensions<size_t>
         return bc_shape;
     }
 
+    /** Check if the tensor shape is dynamic.
+     *
+     * If any dimension of the tensor shape has size 0, then this dimension
+     * and the whole shape are considered dynamic.
+     *
+     * @return True if the tensor shape is dynamic, else false.
+     */
+    bool is_dynamic() const
+    {
+        return std::any_of(cbegin(), cend(), [](const auto &s) { return s == 0; });
+    }
+
+    /** Check if a given dimension of the tensor shape is dynamic.
+     *
+     * If a dimension of the tensor shape has size 0, then this dimension
+     * and the whole shape are considered dynamic.
+     *
+     * @param[in] dim Dimension index.
+     *
+     * @return True if dimension dim is dynamic, else false.
+     */
+    bool is_dynamic(const size_t dim) const
+    {
+        ARM_COMPUTE_ERROR_ON(dim >= TensorShape::num_max_dimensions);
+        return _id[dim] == 0;
+    }
+
 private:
     /** Remove trailing dimensions of size 1 from the reported number of dimensions. */
     void apply_dimension_correction()
@@ -262,4 +285,4 @@ class TensorShape : public Dimensions<size_t>
     }
 };
 } // namespace arm_compute
-#endif /*ARM_COMPUTE_TENSORSHAPE_H*/
+#endif // ACL_ARM_COMPUTE_CORE_TENSORSHAPE_H
diff --git a/arm_compute/runtime/experimental/low_level/CpuGemmAssemblyDispatch.h b/arm_compute/runtime/experimental/low_level/CpuGemmAssemblyDispatch.h
@@ -149,6 +149,11 @@ class CpuGemmAssemblyDispatch : arm_compute::experimental::IOperator
                                const ITensorInfo         *d,
                                const GEMMInfo            &gemm_info = GEMMInfo());
 
+    /** Indicates whether or not there is a implementation for the configured GEMM
+     * @return a bool: true if the implementation is stateless; false if not.
+     */
+    bool has_stateless_impl() const;
+
     /** Checks if activation is supported by the gemm assembly dispatcher
      *
      * @param[in] activation Activation to check

diff --git a/docs/Doxyfile b/docs/Doxyfile
@@ -60,7 +60,7 @@ PROJECT_NAME           = "Compute Library"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 24.11
+PROJECT_NUMBER         = 24.11.1
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a

diff --git a/filelist.json b/filelist.json
@@ -1581,6 +1581,8 @@
           "files": {
           "common": [
             "src/cpu/kernels/CpuConvertQuantizedSignednessKernel.cpp",
+            "src/cpu/kernels/CpuDynamicGemmKernel.cpp",
+            "src/cpu/kernels/CpuDynamicGemmKernelHeuristics.cpp",
             "src/cpu/kernels/CpuGemmMatrixAdditionKernel.cpp",
             "src/cpu/kernels/CpuGemmMatrixMultiplyKernel.cpp",
             "src/cpu/kernels/CpuGemmTranspose1xWKernel.cpp",
@@ -1593,6 +1595,7 @@
             "src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.cpp",
             "src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.cpp",
             "src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.cpp",
+            "src/cpu/operators/CpuDynamicGemm.cpp",
             "src/cpu/operators/CpuGemm.cpp",
             "src/cpu/operators/CpuGemmLowpOutputStage.cpp",
             "src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp",

diff --git a/src/BUILD.bazel b/src/BUILD.bazel
@@ -709,6 +709,8 @@ filegroup(
 	"cpu/kernels/CpuDirectConv2dKernel.cpp",
 	"cpu/kernels/CpuDirectConv2dOutputStageKernel.cpp",
 	"cpu/kernels/CpuDirectConv3dKernel.cpp",
+	"cpu/kernels/CpuDynamicGemmKernel.cpp",
+	"cpu/kernels/CpuDynamicGemmKernelHeuristics.cpp",
 	"cpu/kernels/CpuElementwiseKernel.cpp",
 	"cpu/kernels/CpuElementwiseUnaryKernel.cpp",
 	"cpu/kernels/CpuFillKernel.cpp",
@@ -892,6 +894,7 @@ filegroup(
 	"cpu/operators/CpuDequantize.cpp",
 	"cpu/operators/CpuDirectConv2d.cpp",
 	"cpu/operators/CpuDirectConv3d.cpp",
+	"cpu/operators/CpuDynamicGemm.cpp",
 	"cpu/operators/CpuElementwise.cpp",
 	"cpu/operators/CpuElementwiseUnary.cpp",
 	"cpu/operators/CpuFill.cpp",

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -700,6 +700,8 @@ target_sources(
 	cpu/kernels/CpuDirectConv2dKernel.cpp
 	cpu/kernels/CpuDirectConv2dOutputStageKernel.cpp
 	cpu/kernels/CpuDirectConv3dKernel.cpp
+	cpu/kernels/CpuDynamicGemmKernel.cpp
+	cpu/kernels/CpuDynamicGemmKernelHeuristics.cpp
 	cpu/kernels/CpuElementwiseKernel.cpp
 	cpu/kernels/CpuElementwiseUnaryKernel.cpp
 	cpu/kernels/CpuFillKernel.cpp
@@ -883,6 +885,7 @@ target_sources(
 	cpu/operators/CpuDequantize.cpp
 	cpu/operators/CpuDirectConv2d.cpp
 	cpu/operators/CpuDirectConv3d.cpp
+	cpu/operators/CpuDynamicGemm.cpp
 	cpu/operators/CpuElementwise.cpp
 	cpu/operators/CpuElementwiseUnary.cpp
 	cpu/operators/CpuFill.cpp

diff --git a/src/common/cpuinfo/CpuInfo.cpp b/src/common/cpuinfo/CpuInfo.cpp
@@ -416,7 +416,7 @@ CpuInfo CpuInfo::build()
     return info;
 #elif defined(__aarch64__) && defined(_WIN64)    /* #elif defined(__aarch64__) && defined(__APPLE__) */
     CpuIsaInfo isainfo;
-    isainfo.neon = true;
+    isainfo.neon = IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE);
     isainfo.dot  = IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE);
     if (NTDDI_VERSION >= NTDDI_WIN11_GE)
     {