From 615236ce07261b622c5fe10de91e2832c4c4e985 Mon Sep 17 00:00:00 2001
From: Dmitrii Zarukin <dmitry.zarukin@intel.com>
Date: Sat, 9 Sep 2023 12:24:20 -0700
Subject: [PATCH] gpu: nvidia, amd: switch a default internal stream to
 in-order

---
 src/gpu/amd/README.md               | 12 +++++-------
 src/gpu/amd/sycl_hip_stream.cpp     |  5 ++++-
 src/gpu/nvidia/README.md            | 17 ++++++-----------
 src/gpu/nvidia/sycl_cuda_stream.cpp |  8 ++++----
 4 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/src/gpu/amd/README.md b/src/gpu/amd/README.md
index e4a1fcd94bd..27f34f901f8 100644
--- a/src/gpu/amd/README.md
+++ b/src/gpu/amd/README.md
@@ -2,13 +2,11 @@
 
 ## General information
 
-Support for AMD backend is implemented via SYCL HIP backend. The feature is disabled
-by default. Users must enable it at build time with a CMake option `DNNL_GPU_VENDOR=AMD`.
-The AMD GPUs can be used via oneDNN engine abstraction. The engine should be created using
-`dnnl::engine::kind::gpu` engine kind or the user can provide a `sycl::device` objects that
-corresponds to AMD GPUs. The stream in AMD backend defines an out-of-order SYCL queue by default.
-Similar to the existing oneDNN API, user can specify an in-order queue when creating
-a stream if needed.
+Support for AMD backend is implemented via SYCL HIP backend. The feature is
+disabled by default. Users must enable it at build time with a CMake option
+`DNNL_GPU_VENDOR=AMD`. The AMD GPUs can be used via oneDNN engine abstraction.
+The engine should be created using `dnnl::engine::kind::gpu` engine kind or the
+user can provide a `sycl::device` objects that corresponds to AMD GPUs.
 
 ## Pre-requisites
 * [oneAPI DPC++ Compiler with support for HIP AMD](https://github.com/intel/llvm/blob/sycl/sycl/doc/GetStartedGuide.md#build-dpc-toolchain-with-support-for-hip-amd), version [2022-12](https://github.com/intel/llvm/releases/tag/2022-12)
diff --git a/src/gpu/amd/sycl_hip_stream.cpp b/src/gpu/amd/sycl_hip_stream.cpp
index cc99e3dfc59..dc5e62b85ad 100644
--- a/src/gpu/amd/sycl_hip_stream.cpp
+++ b/src/gpu/amd/sycl_hip_stream.cpp
@@ -72,7 +72,10 @@ status_t sycl_hip_stream_t::init() {
     if (!queue_) {
         auto &sycl_ctx = sycl_engine.context();
         auto &sycl_dev = sycl_engine.device();
-        queue_.reset(new ::sycl::queue(sycl_ctx, sycl_dev));
+        ::sycl::property_list prop_list;
+        if (flags() & stream_flags::in_order)
+            prop_list = {::sycl::property::queue::in_order {}};
+        queue_.reset(new ::sycl::queue(sycl_ctx, sycl_dev, prop_list));
     } else {
         // We need to check that the given queue is associated with
         // the device and context of the engine.
diff --git a/src/gpu/nvidia/README.md b/src/gpu/nvidia/README.md
index 0f26500da0f..45e2be0aebc 100644
--- a/src/gpu/nvidia/README.md
+++ b/src/gpu/nvidia/README.md
@@ -2,17 +2,12 @@
 
 ## General information
 
-The Nvidia backend for oneDNN can be exposed to the user via the
-`dnnl::engine::kind::gpu` engine kind. Currently, for the case when user's
-system has both Intel and Nvidia GPUs, `DNNL_GPU_VENDOR=NVIDIA` flag is used in
-CMake, since the devices are clustered based on the device vendor ID and index
-pattern can not be used to distinguish between Intel GPU and Nvidia GPU.
-However, Intel is working on restructuring the engine creation, so that it would
-be possible to choose engine kind and vendor kind at runtime. Also, it is
-possible to create oneDNN engines using `sycl::device` objects corresponding to
-Nvidia GPUs. The stream in Nvidia backend for oneDNN defines an out-of-order
-SYCL queue by default. Similar to the existing oneDNN API, user can specify an
-in-order queue when creating a stream if needed.
+Support for Nvidia backend is implemented via SYCL CUDA backend. The feature is
+disabled by default. Users must enable it at build time with a CMake option
+`DNNL_GPU_VENDOR=NVIDIA`. The Nvidia GPUs can be used via oneDNN engine
+abstraction. The engine should be created using `dnnl::engine::kind::gpu` engine
+kind or the user can provide a `sycl::device` objects that corresponds to Nvidia
+GPUs.
 
 ## Pre-requisites
 * [oneAPI DPC++ Compiler with support for CUDA](https://github.com/intel/llvm/blob/sycl/sycl/doc/GetStartedGuide.md#build-dpc-toolchain-with-support-for-nvidia-cuda)
diff --git a/src/gpu/nvidia/sycl_cuda_stream.cpp b/src/gpu/nvidia/sycl_cuda_stream.cpp
index 502e95e7f16..5cf8e069f37 100644
--- a/src/gpu/nvidia/sycl_cuda_stream.cpp
+++ b/src/gpu/nvidia/sycl_cuda_stream.cpp
@@ -73,10 +73,10 @@ status_t sycl_cuda_stream_t::init() {
     if (!queue_) {
         auto &sycl_ctx = sycl_engine.context();
         auto &sycl_dev = sycl_engine.device();
-        // Use `::sycl::property_list {::sycl::property::queue::in_order {}}` as
-        // third argument in `::sycl::queue` ctor to convert a queue into
-        // in-order one.
-        queue_.reset(new ::sycl::queue(sycl_ctx, sycl_dev));
+        ::sycl::property_list prop_list;
+        if (flags() & stream_flags::in_order)
+            prop_list = {::sycl::property::queue::in_order {}};
+        queue_.reset(new ::sycl::queue(sycl_ctx, sycl_dev, prop_list));
     } else {
         auto sycl_dev = queue().get_device();
         bool args_ok