From 615236ce07261b622c5fe10de91e2832c4c4e985 Mon Sep 17 00:00:00 2001 From: Dmitrii Zarukin Date: Sat, 9 Sep 2023 12:24:20 -0700 Subject: [PATCH] gpu: nvidia, amd: switch a default internal stream to in-order --- src/gpu/amd/README.md | 12 +++++------- src/gpu/amd/sycl_hip_stream.cpp | 5 ++++- src/gpu/nvidia/README.md | 17 ++++++----------- src/gpu/nvidia/sycl_cuda_stream.cpp | 8 ++++---- 4 files changed, 19 insertions(+), 23 deletions(-) diff --git a/src/gpu/amd/README.md b/src/gpu/amd/README.md index e4a1fcd94bd..27f34f901f8 100644 --- a/src/gpu/amd/README.md +++ b/src/gpu/amd/README.md @@ -2,13 +2,11 @@ ## General information -Support for AMD backend is implemented via SYCL HIP backend. The feature is disabled -by default. Users must enable it at build time with a CMake option `DNNL_GPU_VENDOR=AMD`. -The AMD GPUs can be used via oneDNN engine abstraction. The engine should be created using -`dnnl::engine::kind::gpu` engine kind or the user can provide a `sycl::device` objects that -corresponds to AMD GPUs. The stream in AMD backend defines an out-of-order SYCL queue by default. -Similar to the existing oneDNN API, user can specify an in-order queue when creating -a stream if needed. +Support for AMD backend is implemented via SYCL HIP backend. The feature is +disabled by default. Users must enable it at build time with a CMake option +`DNNL_GPU_VENDOR=AMD`. The AMD GPUs can be used via oneDNN engine abstraction. +The engine should be created using `dnnl::engine::kind::gpu` engine kind or the +user can provide a `sycl::device` objects that corresponds to AMD GPUs. ## Pre-requisites * [oneAPI DPC++ Compiler with support for HIP AMD](https://github.com/intel/llvm/blob/sycl/sycl/doc/GetStartedGuide.md#build-dpc-toolchain-with-support-for-hip-amd), version [2022-12](https://github.com/intel/llvm/releases/tag/2022-12) diff --git a/src/gpu/amd/sycl_hip_stream.cpp b/src/gpu/amd/sycl_hip_stream.cpp index cc99e3dfc59..dc5e62b85ad 100644 --- a/src/gpu/amd/sycl_hip_stream.cpp +++ b/src/gpu/amd/sycl_hip_stream.cpp @@ -72,7 +72,10 @@ status_t sycl_hip_stream_t::init() { if (!queue_) { auto &sycl_ctx = sycl_engine.context(); auto &sycl_dev = sycl_engine.device(); - queue_.reset(new ::sycl::queue(sycl_ctx, sycl_dev)); + ::sycl::property_list prop_list; + if (flags() & stream_flags::in_order) + prop_list = {::sycl::property::queue::in_order {}}; + queue_.reset(new ::sycl::queue(sycl_ctx, sycl_dev, prop_list)); } else { // We need to check that the given queue is associated with // the device and context of the engine. diff --git a/src/gpu/nvidia/README.md b/src/gpu/nvidia/README.md index 0f26500da0f..45e2be0aebc 100644 --- a/src/gpu/nvidia/README.md +++ b/src/gpu/nvidia/README.md @@ -2,17 +2,12 @@ ## General information -The Nvidia backend for oneDNN can be exposed to the user via the -`dnnl::engine::kind::gpu` engine kind. Currently, for the case when user's -system has both Intel and Nvidia GPUs, `DNNL_GPU_VENDOR=NVIDIA` flag is used in -CMake, since the devices are clustered based on the device vendor ID and index -pattern can not be used to distinguish between Intel GPU and Nvidia GPU. -However, Intel is working on restructuring the engine creation, so that it would -be possible to choose engine kind and vendor kind at runtime. Also, it is -possible to create oneDNN engines using `sycl::device` objects corresponding to -Nvidia GPUs. The stream in Nvidia backend for oneDNN defines an out-of-order -SYCL queue by default. Similar to the existing oneDNN API, user can specify an -in-order queue when creating a stream if needed. +Support for Nvidia backend is implemented via SYCL CUDA backend. The feature is +disabled by default. Users must enable it at build time with a CMake option +`DNNL_GPU_VENDOR=NVIDIA`. The Nvidia GPUs can be used via oneDNN engine +abstraction. The engine should be created using `dnnl::engine::kind::gpu` engine +kind or the user can provide a `sycl::device` objects that corresponds to Nvidia +GPUs. ## Pre-requisites * [oneAPI DPC++ Compiler with support for CUDA](https://github.com/intel/llvm/blob/sycl/sycl/doc/GetStartedGuide.md#build-dpc-toolchain-with-support-for-nvidia-cuda) diff --git a/src/gpu/nvidia/sycl_cuda_stream.cpp b/src/gpu/nvidia/sycl_cuda_stream.cpp index 502e95e7f16..5cf8e069f37 100644 --- a/src/gpu/nvidia/sycl_cuda_stream.cpp +++ b/src/gpu/nvidia/sycl_cuda_stream.cpp @@ -73,10 +73,10 @@ status_t sycl_cuda_stream_t::init() { if (!queue_) { auto &sycl_ctx = sycl_engine.context(); auto &sycl_dev = sycl_engine.device(); - // Use `::sycl::property_list {::sycl::property::queue::in_order {}}` as - // third argument in `::sycl::queue` ctor to convert a queue into - // in-order one. - queue_.reset(new ::sycl::queue(sycl_ctx, sycl_dev)); + ::sycl::property_list prop_list; + if (flags() & stream_flags::in_order) + prop_list = {::sycl::property::queue::in_order {}}; + queue_.reset(new ::sycl::queue(sycl_ctx, sycl_dev, prop_list)); } else { auto sycl_dev = queue().get_device(); bool args_ok