Skip to content

Commit

Permalink
gpu: amd: Enable SYCL kernels
Browse files Browse the repository at this point in the history
  • Loading branch information
sgeor255 committed Aug 7, 2024
1 parent 2b97e32 commit fe16e2e
Show file tree
Hide file tree
Showing 19 changed files with 75 additions and 53 deletions.
23 changes: 12 additions & 11 deletions cmake/options.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -159,13 +159,13 @@ set(ONEDNN_ENABLE_GEMM_KERNELS_ISA "ALL" CACHE STRING
SSE41 < AVX2 < AVX512 < AMX (or ALL). It means that if user selects, e.g.
AVX2 ISA, SSE41 kernels will also present at build time.")

set(DNNL_HIP_TARGET_ARCH "" CACHE STRING
"Specifies the target offload architecture. Required when compiling
sycl kernels for AMD target backend.")
if("${DNNL_HIP_TARGET_ARCH}" STREQUAL "")
add_definitions(-DDNNL_HIP_DISABLE_SYCL_KERNELS=1)
set(DNNL_HIP_DISABLE_SYCL_KERNELS TRUE)
endif()
set(DNNL_AMD_SYCL_KERNELS_TARGET_ARCH "" CACHE STRING
"Specifies the target architecture (e.g. gfx90a when compiling on AMD MI210)
to be used for compiling generic SYCL kernels for AMD vendor.
When this option is set to a valid architecture (see LLVM target column in
https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html#supported-gpus
for supported architectures), the generic SYCL kernels will be enabled for AMD
vendor. If not set, the SYCL kernels will not be compiled.")

# =============
# Optimizations
Expand Down Expand Up @@ -317,12 +317,8 @@ endif()

if(DNNL_GPU_RUNTIME STREQUAL "DPCPP" OR DNNL_GPU_RUNTIME STREQUAL "SYCL")
set(DNNL_GPU_SYCL true)
set(DNNL_SYCL_INTEL OFF)
set(DNNL_SYCL_CUDA OFF)
set(DNNL_SYCL_HIP OFF)
if(DNNL_GPU_VENDOR STREQUAL "INTEL")
set(DNNL_SYCL_INTEL ON)
endif()
if(DNNL_GPU_VENDOR STREQUAL "NVIDIA")
set(DNNL_SYCL_CUDA ON)
endif()
Expand All @@ -339,6 +335,11 @@ else()
set(DNNL_WITH_SYCL false)
endif()

if(DNNL_SYCL_HIP AND NOT "${DNNL_AMD_SYCL_KERNELS_TARGET_ARCH}" STREQUAL "")
add_definitions(-DDNNL_AMD_ENABLE_SYCL_KERNELS=1)
set(DNNL_AMD_ENABLE_SYCL_KERNELS TRUE)
endif()

# =============
# Miscellaneous
# =============
Expand Down
5 changes: 2 additions & 3 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,8 @@ if(DNNL_SYCL_CUDA)
append(CMAKE_CXX_FLAGS "-Wno-linker-warnings")
endif()

if(DNNL_SYCL_HIP AND NOT DNNL_HIP_DISABLE_SYCL_KERNELS)
append(CMAKE_CXX_FLAGS "-fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${DNNL_HIP_TARGET_ARCH}")
append(CMAKE_CXX_FLAGS "-Wno-linker-warnings")
if (DNNL_AMD_ENABLE_SYCL_KERNELS)
append(CMAKE_CXX_FLAGS "-fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${DNNL_AMD_SYCL_KERNELS_TARGET_ARCH}")
endif()

# propagate sanitizer flags
Expand Down
6 changes: 3 additions & 3 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,9 @@ if(DNNL_WITH_SYCL)
append(CMAKE_SHARED_LINKER_FLAGS "-fsycl-targets=nvptx64-nvidia-cuda")
append(CMAKE_STATIC_LINKER_FLAGS "-fsycl-targets=nvptx64-nvidia-cuda")
endif()
if(DNNL_SYCL_HIP AND NOT DNNL_HIP_DISABLE_SYCL_KERNELS)
append(CMAKE_SHARED_LINKER_FLAGS "-fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${DNNL_HIP_TARGET_ARCH}")
append(CMAKE_STATIC_LINKER_FLAGS "-fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${DNNL_HIP_TARGET_ARCH}")
if(DNNL_AMD_ENABLE_SYCL_KERNELS)
append(CMAKE_SHARED_LINKER_FLAGS "-fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${DNNL_AMD_SYCL_KERNELS_TARGET_ARCH}")
append(CMAKE_STATIC_LINKER_FLAGS "-fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${DNNL_AMD_SYCL_KERNELS_TARGET_ARCH}")
endif()
endif()

Expand Down
10 changes: 3 additions & 7 deletions src/gpu/generic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,13 @@ file(GLOB SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
)

if(DNNL_SYCL_INTEL)
add_subdirectory(sycl)
endif()

if(DNNL_SYCL_CUDA)
add_subdirectory(sycl)
endif()

# Only enable the generic SYCL kernels on AMD backend
# if architecture has been specified
if(DNNL_SYCL_HIP AND NOT DNNL_HIP_DISABLE_SYCL_KERNELS)
# Only enable the generic SYCL kernels for AMD vendor
# if target architecture has been specified
if(DNNL_AMD_ENABLE_SYCL_KERNELS)
add_subdirectory(sycl)
endif()

Expand Down
4 changes: 2 additions & 2 deletions src/gpu/generic/sycl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ if(DNNL_SYCL_CUDA)
append(CMAKE_CXX_FLAGS "-Wno-linker-warnings")
endif()

if(DNNL_SYCL_HIP AND NOT DNNL_HIP_DISABLE_SYCL_KERNELS)
append(CMAKE_CXX_FLAGS "-fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${DNNL_HIP_TARGET_ARCH}")
if(DNNL_AMD_ENABLE_SYCL_KERNELS)
append(CMAKE_CXX_FLAGS "-fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${DNNL_AMD_SYCL_KERNELS_TARGET_ARCH}")
endif()

set(OBJ_LIB ${LIB_PACKAGE_NAME}_gpu_generic_sycl)
Expand Down
6 changes: 4 additions & 2 deletions src/gpu/gpu_batch_normalization_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

#include "gpu/gpu_impl_list.hpp"

#include "gpu/generic/sycl/ref_batch_normalization.hpp"

#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL
#include "gpu/intel/ocl/bnorm/gen9_batch_normalization.hpp"
#include "gpu/intel/ocl/bnorm/nhwc_batch_normalization.hpp"
Expand All @@ -39,6 +37,10 @@
#include "gpu/amd/miopen_batch_normalization.hpp"
#endif

#if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA || DNNL_GPU_VENDOR == DNNL_VENDOR_AMD
#include "gpu/generic/sycl/ref_batch_normalization.hpp"
#endif

namespace dnnl {
namespace impl {
namespace gpu {
Expand Down
6 changes: 4 additions & 2 deletions src/gpu/gpu_binary_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

#include "gpu/gpu_impl_list.hpp"

#include "gpu/generic/sycl/ref_binary.hpp"

#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL
#include "gpu/intel/ocl/gen9_binary.hpp"
#include "gpu/intel/ocl/multi_po_reorder_binary.hpp"
Expand All @@ -32,6 +30,10 @@
#include "gpu/amd/miopen_binary.hpp"
#endif

#if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA || DNNL_GPU_VENDOR == DNNL_VENDOR_AMD
#include "gpu/generic/sycl/ref_binary.hpp"
#endif

namespace dnnl {
namespace impl {
namespace gpu {
Expand Down
6 changes: 4 additions & 2 deletions src/gpu/gpu_convolution_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

#include "gpu/gpu_impl_list.hpp"

#include "gpu/generic/sycl/ref_convolution.hpp"

#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL
#include "gpu/intel/jit/binary_format.hpp"
#include "gpu/intel/jit/conv/gen_convolution.hpp"
Expand All @@ -38,6 +36,10 @@
#include "gpu/amd/miopen_convolution.hpp"
#endif

#if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA || DNNL_GPU_VENDOR == DNNL_VENDOR_AMD
#include "gpu/generic/sycl/ref_convolution.hpp"
#endif

namespace dnnl {
namespace impl {
namespace gpu {
Expand Down
6 changes: 4 additions & 2 deletions src/gpu/gpu_eltwise_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

#include "gpu/gpu_impl_list.hpp"

#include "gpu/generic/sycl/ref_eltwise.hpp"

#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL
#include "gpu/intel/ocl/gen9_eltwise.hpp"
#include "gpu/intel/ocl/ref_eltwise.hpp"
Expand All @@ -31,6 +29,10 @@
#include "gpu/amd/miopen_eltwise.hpp"
#endif

#if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA || DNNL_GPU_VENDOR == DNNL_VENDOR_AMD
#include "gpu/generic/sycl/ref_eltwise.hpp"
#endif

namespace dnnl {
namespace impl {
namespace gpu {
Expand Down
4 changes: 1 addition & 3 deletions src/gpu/gpu_impl_list.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,8 @@ namespace gpu {
// NOTE: Support for the standalone GENERIC vendor has not been added yet.
#if defined(DNNL_WITH_SYCL) \
&& ((DNNL_GPU_VENDOR == DNNL_VENDOR_GENERIC) \
|| (DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL) \
|| (DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA) \
|| (DNNL_GPU_VENDOR == DNNL_VENDOR_AMD \
&& DNNL_HIP_DISABLE_SYCL_KERNELS != 1))
|| (DNNL_AMD_ENABLE_SYCL_KERNELS == 1))
#define DNNL_GPU_GENERIC_SYCL_ONLY(...) __VA_ARGS__
#else
#define DNNL_GPU_GENERIC_SYCL_ONLY(...)
Expand Down
6 changes: 4 additions & 2 deletions src/gpu/gpu_layer_normalization_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

#include "gpu/gpu_impl_list.hpp"

#include "gpu/generic/sycl/ref_layer_normalizations.hpp"

#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL
#include "gpu/intel/ocl/ref_layer_normalization.hpp"
#include "gpu/intel/ocl/reusable_lnorm.hpp"
Expand All @@ -26,6 +24,10 @@
#include "gpu/intel/ocl/vectorized_lnorm.hpp"
#endif

#if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA || DNNL_GPU_VENDOR == DNNL_VENDOR_AMD
#include "gpu/generic/sycl/ref_layer_normalizations.hpp"
#endif

namespace dnnl {
namespace impl {
namespace gpu {
Expand Down
6 changes: 4 additions & 2 deletions src/gpu/gpu_lrn_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

#include "gpu/gpu_impl_list.hpp"

#include "gpu/generic/sycl/ref_lrn.hpp"

#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL
#include "gpu/intel/ocl/ref_lrn.hpp"
#endif
Expand All @@ -30,6 +28,10 @@
#include "gpu/amd/miopen_lrn.hpp"
#endif

#if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA || DNNL_GPU_VENDOR == DNNL_VENDOR_AMD
#include "gpu/generic/sycl/ref_lrn.hpp"
#endif

namespace dnnl {
namespace impl {
namespace gpu {
Expand Down
6 changes: 4 additions & 2 deletions src/gpu/gpu_pooling_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@

#include "gpu/gpu_impl_list.hpp"

#include "gpu/generic/sycl/ref_pooling.hpp"

#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL
#include "gpu/intel/jit/pooling/gen_pooling.hpp"
#include "gpu/intel/ocl/gen9_global_pooling.hpp"
Expand All @@ -35,6 +33,10 @@
#include "gpu/amd/miopen_pooling.hpp"
#endif

#if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA || DNNL_GPU_VENDOR == DNNL_VENDOR_AMD
#include "gpu/generic/sycl/ref_pooling.hpp"
#endif

namespace dnnl {
namespace impl {
namespace gpu {
Expand Down
5 changes: 4 additions & 1 deletion src/gpu/gpu_prelu_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,16 @@

#include "common/compiler_workarounds.hpp"

#include "gpu/generic/sycl/ref_prelu.hpp"
#include "gpu/gpu_impl_list.hpp"

#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL
#include "gpu/intel/ocl/ref_prelu.hpp"
#endif

#if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA || DNNL_GPU_VENDOR == DNNL_VENDOR_AMD
#include "gpu/generic/sycl/ref_prelu.hpp"
#endif

namespace dnnl {
namespace impl {
namespace gpu {
Expand Down
4 changes: 3 additions & 1 deletion src/gpu/gpu_reorder_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

#include "gpu/generic/cross_engine_reorder.hpp"
#include "gpu/generic/direct_copy.hpp"
#include "gpu/generic/sycl/ref_reorder.hpp"

#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL
#include "gpu/intel/jit/reorder/gen_reorder.hpp"
Expand All @@ -36,6 +35,9 @@
#include "gpu/amd/miopen_reorder.hpp"
#endif

#if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA || DNNL_GPU_VENDOR == DNNL_VENDOR_AMD
#include "gpu/generic/sycl/ref_reorder.hpp"
#endif
namespace dnnl {
namespace impl {
namespace gpu {
Expand Down
6 changes: 4 additions & 2 deletions src/gpu/gpu_resampling_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

#include "gpu/gpu_impl_list.hpp"

#include "gpu/generic/sycl/ref_resampling.hpp"

#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL
#include "gpu/intel/ocl/ref_resampling.hpp"
#include "gpu/intel/ocl/vectorized_resampling.hpp"
Expand All @@ -27,6 +25,10 @@
#include "gpu/nvidia/cudnn_resampling.hpp"
#endif

#if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA || DNNL_GPU_VENDOR == DNNL_VENDOR_AMD
#include "gpu/generic/sycl/ref_resampling.hpp"
#endif

namespace dnnl {
namespace impl {
namespace gpu {
Expand Down
6 changes: 4 additions & 2 deletions src/gpu/gpu_shuffle_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@

#include "gpu/gpu_impl_list.hpp"

#include "gpu/generic/sycl/ref_shuffle.hpp"

#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL
#include "gpu/intel/ocl/ref_shuffle.hpp"
#include "gpu/intel/ocl/shuffle_by_reorder.hpp"
#endif

#if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA || DNNL_GPU_VENDOR == DNNL_VENDOR_AMD
#include "gpu/generic/sycl/ref_shuffle.hpp"
#endif

namespace dnnl {
namespace impl {
namespace gpu {
Expand Down
6 changes: 4 additions & 2 deletions src/gpu/gpu_softmax_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

#include "gpu/gpu_impl_list.hpp"

#include "gpu/generic/sycl/ref_softmax.hpp"

#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL
#include "gpu/intel/ocl/gen9_softmax.hpp"
#include "gpu/intel/ocl/reusable_softmax.hpp"
Expand All @@ -32,6 +30,10 @@
#include "gpu/amd/miopen_softmax.hpp"
#endif

#if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA || DNNL_GPU_VENDOR == DNNL_VENDOR_AMD
#include "gpu/generic/sycl/ref_softmax.hpp"
#endif

namespace dnnl {
namespace impl {
namespace gpu {
Expand Down
7 changes: 5 additions & 2 deletions src/gpu/gpu_sum_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
#include "gpu/gpu_sum_pd.hpp"

#include "gpu/generic/ref_sum.hpp"
#include "gpu/generic/sycl/ref_sum.hpp"
#include "gpu/generic/sycl/ref_sum_many_inputs.hpp"

#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL
#include "gpu/intel/jit/gen9_simple_sum.hpp"
Expand All @@ -35,6 +33,11 @@
#include "gpu/nvidia/cudnn_sum.hpp"
#endif

#if DNNL_GPU_VENDOR == DNNL_VENDOR_NVIDIA || DNNL_GPU_VENDOR == DNNL_VENDOR_AMD
#include "gpu/generic/sycl/ref_sum.hpp"
#include "gpu/generic/sycl/ref_sum_many_inputs.hpp"
#endif

namespace dnnl {
namespace impl {
namespace gpu {
Expand Down

0 comments on commit fe16e2e

Please sign in to comment.