Skip to content

Commit

Permalink
[ARM] Windows ARM64 support
Browse files Browse the repository at this point in the history
  • Loading branch information
luweizhou2016 authored and xczhai committed Dec 4, 2024
1 parent b39ffaa commit fc08d8c
Show file tree
Hide file tree
Showing 10 changed files with 51 additions and 55 deletions.
14 changes: 8 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,14 @@ if(POLICY CMP0082)
cmake_policy(SET CMP0082 NEW)
endif()

if("${CMAKE_BUILD_TYPE}" STREQUAL "")
message(STATUS "CMAKE_BUILD_TYPE is unset, defaulting to Release")
set(CMAKE_BUILD_TYPE "Release" CACHE STRING
"Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel RelWithAssert RelWithMDd...")
get_property(ONEDNN_GENERATOR_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
if(NOT ONEDNN_GENERATOR_MULTI_CONFIG)
if(NOT CMAKE_BUILD_TYPE)
message(STATUS "CMAKE_BUILD_TYPE is unset, defaulting to 'Release'")
set(CMAKE_BUILD_TYPE "Release" CACHE STRING
"Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel RelWithAssert RelWithMDd...")
endif()
string(TOUPPER "${CMAKE_BUILD_TYPE}" UPPERCASE_CMAKE_BUILD_TYPE)
endif()

set(PROJECT_NAME "oneDNN")
Expand Down Expand Up @@ -109,8 +113,6 @@ set(CMAKE_SRC_CCXX_FLAGS) # SRC specifics
set(CMAKE_EXAMPLE_CCXX_FLAGS) # EXAMPLE specifics
set(CMAKE_TEST_CCXX_FLAGS) # TESTS specifics

string(TOUPPER "${CMAKE_BUILD_TYPE}" UPPERCASE_CMAKE_BUILD_TYPE)

set(LIB_PACKAGE_NAME "dnnl")

include("cmake/dnnl_compat.cmake")
Expand Down
55 changes: 25 additions & 30 deletions cmake/platform.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ set(CMAKE_CCXX_FLAGS)
set(CMAKE_CCXX_NOWARN_FLAGS)
set(CMAKE_CCXX_NOEXCEPT_FLAGS)
set(DEF_ARCH_OPT_FLAGS)
set(DEF_ARCH_OPT_FLAGS_NON_DEBUG)

# Compatibility with DNNL
if($ENV{ONEDNN_WERROR})
Expand Down Expand Up @@ -119,7 +120,7 @@ endif()
if(MSVC)
set(USERCONFIG_PLATFORM "x64")
append_if(DNNL_WERROR CMAKE_CCXX_FLAGS "/WX")
if(${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC)
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
append(CMAKE_CCXX_FLAGS "/MP")
# increase number of sections in obj file
append(CMAKE_CCXX_FLAGS "/bigobj")
Expand Down Expand Up @@ -235,9 +236,7 @@ elseif(UNIX OR MINGW)
# compiler specific settings
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
if(DNNL_TARGET_ARCH MATCHES "^(AARCH64|ARM)$")
if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
set(DEF_ARCH_OPT_FLAGS "-O3")
endif()
set(DEF_ARCH_OPT_FLAGS_NON_DEBUG "-O3")
if (CMAKE_SYSTEM_PROCESSOR STREQUAL CMAKE_HOST_SYSTEM_PROCESSOR)
# Defaults to a generic cpu target, equivalent to setting -mtune=generic -march=armv8-a.
# This ensures no implementation specific tuning, or architectural features beyond
Expand All @@ -247,21 +246,17 @@ elseif(UNIX OR MINGW)
append(DEF_ARCH_OPT_FLAGS "-mcpu=generic")
endif()
elseif(DNNL_TARGET_ARCH STREQUAL "PPC64")
if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
set(DEF_ARCH_OPT_FLAGS "-O3")
endif()
# For native compilation tune for the host processor
if (CMAKE_SYSTEM_PROCESSOR STREQUAL CMAKE_HOST_SYSTEM_PROCESSOR)
append(DEF_ARCH_OPT_FLAGS "-mcpu=native")
endif()
set(DEF_ARCH_OPT_FLAGS_NON_DEBUG "-O3")
# For native compilation tune for the host processor
if (CMAKE_SYSTEM_PROCESSOR STREQUAL CMAKE_HOST_SYSTEM_PROCESSOR)
append(DEF_ARCH_OPT_FLAGS "-mcpu=native")
endif()
elseif(DNNL_TARGET_ARCH STREQUAL "S390X")
if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
set(DEF_ARCH_OPT_FLAGS "-O3")
endif()
# For native compilation tune for the host processor
if (CMAKE_SYSTEM_PROCESSOR STREQUAL CMAKE_HOST_SYSTEM_PROCESSOR)
append(DEF_ARCH_OPT_FLAGS "-march=native")
endif()
set(DEF_ARCH_OPT_FLAGS_NON_DEBUG "-O3")
# For native compilation tune for the host processor
if (CMAKE_SYSTEM_PROCESSOR STREQUAL CMAKE_HOST_SYSTEM_PROCESSOR)
append(DEF_ARCH_OPT_FLAGS "-march=native")
endif()
elseif(DNNL_TARGET_ARCH STREQUAL "X64")
platform_clang_x64_arch_ccxx_flags(DEF_ARCH_OPT_FLAGS)
endif()
Expand Down Expand Up @@ -326,7 +321,7 @@ elseif(UNIX OR MINGW)
endif()
endif()

elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
# XXX: Suppress a warning that pops up when using a function pointer
# to an OpenCL function as a template argument (GCC Bugzilla – Bug 71463).
if (DNNL_GPU_RUNTIME STREQUAL "OCL")
Expand All @@ -340,9 +335,8 @@ elseif(UNIX OR MINGW)
endif()

if(DNNL_TARGET_ARCH MATCHES "^(AARCH64|ARM)$")
if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
set(DEF_ARCH_OPT_FLAGS "-O3")
endif()
set(DEF_ARCH_OPT_FLAGS_NON_DEBUG "-O3")
# For native compilation tune for the host processor
if (CMAKE_SYSTEM_PROCESSOR STREQUAL CMAKE_HOST_SYSTEM_PROCESSOR)
# Defaults to a generic cpu target, equivalent to setting -mtune=generic -march=armv8-a.
# This ensures no implementation specific tuning, or architectural features beyond
Expand All @@ -352,18 +346,14 @@ elseif(UNIX OR MINGW)
append(DEF_ARCH_OPT_FLAGS "-mcpu=generic")
endif()
elseif(DNNL_TARGET_ARCH STREQUAL "PPC64")
if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
set(DEF_ARCH_OPT_FLAGS "-O3")
endif()
set(DEF_ARCH_OPT_FLAGS_NON_DEBUG "-O3")
# In GCC, -ftree-vectorize is turned on under -O3 since 2007.
# For native compilation tune for the host processor
if (CMAKE_SYSTEM_PROCESSOR STREQUAL CMAKE_HOST_SYSTEM_PROCESSOR)
append(DEF_ARCH_OPT_FLAGS "-mcpu=native")
endif()
elseif(DNNL_TARGET_ARCH STREQUAL "S390X")
if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
set(DEF_ARCH_OPT_FLAGS "-O3")
endif()
set(DEF_ARCH_OPT_FLAGS_NON_DEBUG "-O3")
# In GCC, -ftree-vectorize is turned on under -O3 since 2007.
# For native compilation tune for the host processor
if (CMAKE_SYSTEM_PROCESSOR STREQUAL CMAKE_HOST_SYSTEM_PROCESSOR)
Expand Down Expand Up @@ -418,8 +408,13 @@ if(DNNL_ARCH_OPT_FLAGS STREQUAL "HostOpts")
set(DNNL_ARCH_OPT_FLAGS "${DEF_ARCH_OPT_FLAGS}")
endif()

append(CMAKE_C_FLAGS "${CMAKE_CCXX_FLAGS} ${DNNL_ARCH_OPT_FLAGS}")
append(CMAKE_CXX_FLAGS "${CMAKE_CCXX_FLAGS} ${DNNL_ARCH_OPT_FLAGS}")
append(CMAKE_C_FLAGS "${CMAKE_CCXX_FLAGS}")
append(CMAKE_CXX_FLAGS "${CMAKE_CCXX_FLAGS}")

foreach(config RELEASE RELWITHDEBINFO MINSIZEREL)
append(CMAKE_CXX_FLAGS_${config} "${DEF_ARCH_OPT_FLAGS_NON_DEBUG}")
append(CMAKE_C_FLAGS_${config} "${DEF_ARCH_OPT_FLAGS_NON_DEBUG}")
endforeach()

if(APPLE)
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
Expand Down
6 changes: 2 additions & 4 deletions src/cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,8 @@ if(DNNL_TARGET_ARCH STREQUAL "PPC64")
file(GLOB FILES_REQUIRED_OPT
${CMAKE_CURRENT_SOURCE_DIR}/gemm/*.[ch]pp
)
if(NOT UPPERCASE_CMAKE_BUILD_TYPE STREQUAL "DEBUG")
set_source_files_properties(${FILES_REQUIRED_OPT}
PROPERTIES COMPILE_FLAGS "-O3 -funroll-loops")
endif()
set_source_files_properties(${FILES_REQUIRED_OPT}
PROPERTIES COMPILE_FLAGS <$<NOT:$<CONFIG:Debug>>,"-O3 -funroll-loops">)
endif()

if(NOT DNNL_ENABLE_JIT_PROFILING)
Expand Down
2 changes: 2 additions & 0 deletions src/cpu/aarch64/acl_convolution_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
* limitations under the License.
*******************************************************************************/

#include <cstdint>

#include "acl_convolution_utils.hpp"
#include "common/convolution_pd.hpp"
#include "common/utils.hpp"
Expand Down
2 changes: 1 addition & 1 deletion src/cpu/aarch64/acl_gemm_convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ status_t acl_gemm_convolution_fwd_t<src_t, wei_t, dst_t, bia_t>::pd_t::init(
using smask_t = primitive_attr_t::skip_mask_t;

bool ok = is_fwd() && set_default_alg_kind(alg_kind::convolution_direct)
&& expect_data_types(src_t, wei_t, bia_t, dst_t, undef)
&& expect_data_types(src_t, wei_t, bia_t, dst_t, data_type::undef)
&& !has_zero_dim_memory()
&& attr()->has_default_values(
smask_t::post_ops | smask_t::fpmath_mode, dst_t);
Expand Down
4 changes: 2 additions & 2 deletions src/cpu/aarch64/acl_indirect_gemm_convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@ status_t acl_indirect_gemm_convolution_fwd_t::pd_t::init(engine_t *engine) {
using namespace data_type;
using smask_t = primitive_attr_t::skip_mask_t;

const bool is_fp16_ok = expect_data_types(f16, f16, f16, f16, undef)
const bool is_fp16_ok = expect_data_types(f16, f16, f16, f16, data_type::undef)
&& attr()->has_default_values(smask_t::post_ops, f16);
const bool is_fp32_ok = expect_data_types(f32, f32, f32, f32, undef)
const bool is_fp32_ok = expect_data_types(f32, f32, f32, f32, data_type::undef)
&& attr()->has_default_values(
smask_t::post_ops | smask_t::fpmath_mode, f32);
bool ok = is_fwd() && set_default_alg_kind(alg_kind::convolution_direct)
Expand Down
10 changes: 5 additions & 5 deletions src/cpu/aarch64/cpu_isa_traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@
#include "common/utils.hpp"
#include "dnnl_types.h"

/* in order to make selinux happy memory that would be marked with X-bit should
* be obtained with mmap */
#if !defined(_WIN32)
#define XBYAK_USE_MMAP_ALLOCATOR
#endif
// /* in order to make selinux happy memory that would be marked with X-bit should
// * be obtained with mmap */
// #if !defined(_WIN32)
// #define XBYAK_USE_MMAP_ALLOCATOR
// #endif

#include "cpu/aarch64/xbyak_aarch64/xbyak_aarch64/xbyak_aarch64.h"
#include "cpu/aarch64/xbyak_aarch64/xbyak_aarch64/xbyak_aarch64_util.h"
Expand Down
3 changes: 2 additions & 1 deletion src/cpu/aarch64/jit_uni_deconv_zp_pad_str_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*******************************************************************************/

#include <cassert>
#include <cstdint>
#include "common/dnnl_thread.hpp"
#include "common/utils.hpp"
#include "cpu/aarch64/jit_primitive_conf.hpp"
Expand Down Expand Up @@ -128,7 +129,7 @@ template <cpu_isa_t isa>
uint32_t jit_uni_deconv_zp_pad_str_kernel_t<isa>::get_next_vmm_idx() {
static constexpr int max_v_regs = cpu_isa_traits<isa>::n_vregs;

const ZReg vmm {static_cast<unsigned int>(current_vmm_++)};
const ZReg vmm {static_cast<uint32_t>(current_vmm_++)};

if (current_vmm_ == max_v_regs) current_vmm_ = number_reserved_vmms_;

Expand Down
8 changes: 2 additions & 6 deletions src/cpu/x64/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,8 @@ if(MSVC)
set_source_files_properties(${FILES_WITHOUT_OPT}
PROPERTIES COMPILE_FLAGS "/Od")
else()
if(UPPERCASE_CMAKE_BUILD_TYPE STREQUAL "DEBUG")
# Some compilers enable optimizations by default.
set(OPT_LEVEL "-O0")
else()
set(OPT_LEVEL "-O1")
endif()
# Some compilers enable optimizations by default.
set(OPT_LEVEL $<IF:$<CONFIG:Debug>,-O0,-O1>)
set_source_files_properties(${FILES_WITHOUT_OPT}
PROPERTIES COMPILE_FLAGS "${OPT_LEVEL}")
endif()
Expand Down
2 changes: 2 additions & 0 deletions src/cpu/x64/xbyak/xbyak_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ class Cpu {
}
return;
}
#ifdef XBYAK_INTEL_CPU_SPECIFIC
// intel
const uint32_t NO_CACHE = 0;
const uint32_t DATA_CACHE = 1;
Expand Down Expand Up @@ -347,6 +348,7 @@ class Cpu {
dataCacheLevels_++;
}
}
#endif // XBYAK_INTEL_CPU_SPECIFIC
}

public:
Expand Down

0 comments on commit fc08d8c

Please sign in to comment.