gpu: add nvidia support

oneapi-src · Nov 25, 2020 · 5d63af1 · 5d63af1
1 parent e73665f
commit 5d63af1
Show file tree

Hide file tree

Showing 124 changed files with 12,919 additions and 31 deletions.
diff --git a/LICENSE b/LICENSE
@@ -180,6 +180,7 @@
    Copyright 2016-2020 Intel Corporation
    Copyright 2018 YANDEX LLC
    Copyright 2020 Arm Limited and affiliates
+   Copyright 2020 Codeplay Software Limited
    Copyright 2019-2020 FUJITSU LIMITED
 
    Licensed under the Apache License, Version 2.0 (the "License");

diff --git a/README.md b/README.md
@@ -21,6 +21,7 @@ The library is optimized for Intel Architecture Processors, Intel Processor
 Graphics and Xe architecture-based Graphics. oneDNN has experimental support
 for the following architectures:
 * Arm\* 64-bit Architecture (AArch64)
+* NVIDIA\* GPU
 * OpenPOWER\* Power ISA (PPC64)
 * IBMz\* (s390x)
 
@@ -190,6 +191,18 @@ is enabled:
     * [Intel oneAPI DPC++ Compiler](https://software.intel.com/en-us/oneapi/dpc-compiler) Beta
     * OpenCL runtime library (OpenCL version 1.2 or later)
     * [oneAPI Level Zero](https://github.com/oneapi-src/level-zero)
+* DPCPP runtime with NVIDIA GPU support requires
+    * [oneAPI DPC++ Compiler](https://github.com/intel/llvm)
+    * OpenCL runtime library (OpenCL version 1.2 or later)
+    * NVIDIA CUDA\* driver
+    * cuBLAS 10.1 or later
+    * cuDNN 7.6 or later
+
+> **WARNING**
+>
+> NVIDIA GPU support is experimental. General information, build instructions
+> and implementation limitations is available in
+> [NVIDIA backend readme](https://github.com/oneapi-src/oneDNN/blob/master/src/gpu/NVIDIA/README.md).
 
 ### Runtime Dependencies
 

diff --git a/THIRD-PARTY-PROGRAMS b/THIRD-PARTY-PROGRAMS
@@ -178,6 +178,7 @@ Copyright (c) 2015-2017 Martin Hensel
 Copyright (c) 2007, Apostolos Syropoulos (<[email protected])
 
 ComputeCPP SDK (cmake/FindComputeCpp.cmake)
+Copyright 2016-2018 Codeplay Software Ltd.
 Xbyak_aarch64 (src/cpu/aarch64/xbyak_aarch64/)
 Copyright 2019-2020 FUJITSU LIMITED
 

diff --git a/cmake/FindPI_CUDA.cmake b/cmake/FindPI_CUDA.cmake
@@ -0,0 +1,31 @@
+#===============================================================================
+# Copyright 2020 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#===============================================================================
+
+find_library(PI_CUDA_LIBRARIES
+    NAMES pi_cuda libpi_cuda.so  PATHS
+      PATH_SUFFIXES lib)
+
+find_package_handle_standard_args(PI_CUDA REQUIRED_VARS PI_CUDA_LIBRARIES)
+
+if(TARGET PI_CUDA::PI_CUDA OR NOT PI_CUDA_FOUND)
+    return()
+endif()
+
+add_library(PI_CUDA::PI_CUDA UNKNOWN IMPORTED)
+set_target_properties(PI_CUDA::PI_CUDA PROPERTIES
+    IMPORTED_LOCATION ${PI_CUDA_LIBRARIES})
+
+mark_as_advanced(PI_CUDA_LIBRARIES)
diff --git a/cmake/FindcuBLAS.cmake b/cmake/FindcuBLAS.cmake
@@ -0,0 +1,45 @@
+#===============================================================================
+# Copyright 2020 Intel Corporation
+# Copyright 2020 Codeplay Software Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#===============================================================================
+
+find_package(CUDA 10.0 REQUIRED)
+find_package(Threads REQUIRED)
+
+find_path(CUBLAS_INCLUDE_DIR "cublas_v2.h"
+          HINTS ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+find_library(CUBLAS_LIBRARY cublas)
+find_library(CUDA_DRIVER_LIBRARY cuda)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(cuBLAS
+    REQUIRED_VARS
+        CUBLAS_INCLUDE_DIR
+        CUDA_INCLUDE_DIRS
+        CUBLAS_LIBRARY
+        CUDA_LIBRARIES
+        CUDA_DRIVER_LIBRARY
+)
+
+if(NOT TARGET cuBLAS::cuBLAS)
+    add_library(cuBLAS::cuBLAS SHARED IMPORTED)
+    set_target_properties(cuBLAS::cuBLAS PROPERTIES
+        IMPORTED_LOCATION ${CUBLAS_LIBRARY}
+        INTERFACE_INCLUDE_DIRECTORIES
+        "${CUBLAS_INCLUDE_DIR};${CUDA_INCLUDE_DIRS}"
+        INTERFACE_LINK_LIBRARIES
+        "Threads::Threads;${CUDA_DRIVER_LIBRARY};${CUDA_LIBRARIES}"
+	INTERFACE_COMPILE_DEFINITIONS CUDA_NO_HALF)
+endif()
diff --git a/cmake/FindcuDNN.cmake b/cmake/FindcuDNN.cmake
@@ -0,0 +1,55 @@
+#===============================================================================
+# Copyright 2020 Intel Corporation
+# Copyright 2020 Codeplay Software Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#===============================================================================
+
+find_package(CUDA 10.0 REQUIRED)
+
+find_path(CUDNN_INCLUDE_DIR "cudnn.h"
+          HINTS ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+find_library(CUDNN_LIBRARY cudnn)
+find_library(CUDA_DRIVER_LIBRARY cuda)
+# this is work around to avoid duplication half creation in both cuda and SYCL
+
+find_package(Threads REQUIRED)
+
+include(FindPackageHandleStandardArgs)
+
+find_library(
+    CUDNN_LIBRARY cudnn
+    HINTS ${CUDA_TOOLKIT_ROOT_DIR}
+    PATH_SUFFIXES lib lib64 bin)
+
+find_package_handle_standard_args(cuDNN
+    REQUIRED_VARS
+        CUDNN_INCLUDE_DIR
+        CUDA_INCLUDE_DIRS
+        CUDNN_LIBRARY
+        CUDA_LIBRARIES
+        CUDA_DRIVER_LIBRARY
+)
+
+if(NOT TARGET cuDNN::cuDNN)
+  add_library(cuDNN::cuDNN SHARED IMPORTED)
+  set_target_properties(cuDNN::cuDNN PROPERTIES
+      IMPORTED_LOCATION
+      ${CUDNN_LIBRARY}
+      INTERFACE_INCLUDE_DIRECTORIES
+      "${CUDA_INCLUDE_DIRS};${CUDNN_INCLUDE_DIR}"
+      INTERFACE_LINK_LIBRARIES
+      "Threads::Threads;${CUDA_DRIVER_LIBRARY};${CUDA_LIBRARIES}"
+      INTERFACE_COMPILE_DEFINITIONS
+      CUDA_NO_HALF)
+endif()
diff --git a/cmake/options.cmake b/cmake/options.cmake
@@ -153,6 +153,13 @@ if(NOT "${DNNL_GPU_RUNTIME}" MATCHES "^(OCL|NONE|DPCPP|SYCL)$")
     message(FATAL_ERROR "Unsupported GPU runtime: ${DNNL_GPU_RUNTIME}")
 endif()
 
+set(DNNL_GPU_VENDOR "INTEL" CACHE STRING
+    "specifies target GPU vendor for GPU engines.
+    Can be INTEL (default) or NVIDIA.")
+if(NOT "${DNNL_GPU_VENDOR}" MATCHES "^(INTEL|NVIDIA)$")
+    message(FATAL_ERROR "Unsupported GPU vendor: ${DNNL_GPU_VENDOR}")
+endif()
+
 set(OPENCLROOT "" CACHE STRING
     "path to Intel SDK for OpenCL applications.
     Use this option to specify custom location for OpenCL.")
@@ -167,6 +174,10 @@ endif()
 
 if(DNNL_GPU_RUNTIME STREQUAL "DPCPP" OR DNNL_GPU_RUNTIME STREQUAL "SYCL")
     set(DNNL_GPU_SYCL true)
+    set(DNNL_SYCL_CUDA OFF)
+    if(DNNL_GPU_VENDOR STREQUAL "NVIDIA")
+        set(DNNL_SYCL_CUDA ON)
+    endif()
 else()
     set(DNNL_GPU_SYCL false)
 endif()

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -61,6 +61,14 @@ if(DNNL_CPU_SYCL)
     endforeach()
 endif()
 
+# Skip examples for CUDA since USM is a default model for the library which is
+# not yet supported for Nvidia backend.
+if(DNNL_SYCL_CUDA)
+    foreach(f ${sources})
+        list(REMOVE_ITEM sources "${f}")
+    endforeach()
+endif()
+
 foreach(src ${sources})
     file(RELATIVE_PATH src_rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${src})
     string(REGEX REPLACE "[/_\\.]" "-" example_name ${src_rel_path})

diff --git a/src/common/dnnl_thread.hpp b/src/common/dnnl_thread.hpp
@@ -130,7 +130,7 @@ inline int dnnl_get_current_num_threads() {
     return tbb::this_task_arena::max_concurrency();
 #elif DNNL_CPU_THREADING_RUNTIME == DNNL_RUNTIME_THREADPOOL
     using namespace dnnl::impl::threadpool_utils;
-    dnnl::threadpool_iface *tp = get_active_threadpool();
+    dnnl::threadpool_interop::threadpool_iface *tp = get_active_threadpool();
     return (tp) ? dnnl_get_max_threads() : 1;
 #else
     return 1;

diff --git a/src/common/memory_tracking.hpp b/src/common/memory_tracking.hpp
@@ -177,6 +177,11 @@ enum {
     key_conv_amx_wsp_buffer,
     key_conv_bia_reduction,
     key_conv_bias_bf16_convert_wsp,
+    key_conv_cudnn,
+    key_conv_cudnn_algo,
+    key_conv_cudnn_filter,
+    key_conv_cudnn_temp,
+    key_conv_dst_bf16_convert_wsp,
     key_conv_bwd_w_1st_bia_reorder,
     key_conv_bwd_w_1st_wei_reorder,
     key_conv_gemm_acc,

diff --git a/src/cpu/cpu_stream.hpp b/src/cpu/cpu_stream.hpp
@@ -55,7 +55,6 @@ struct cpu_stream_t : public stream_t {
         threadpool_utils::deactivate_threadpool();
     }
 #endif
-
 };
 
 } // namespace cpu

diff --git a/src/gpu/CMakeLists.txt b/src/gpu/CMakeLists.txt
@@ -33,3 +33,8 @@ set_property(GLOBAL APPEND PROPERTY DNNL_LIB_DEPS
 add_subdirectory(compute)
 add_subdirectory(jit)
 add_subdirectory(ocl)
+if(DNNL_SYCL_CUDA)
+    add_subdirectory(nvidia)
+    # Pass ${LIB_NAME}_INTERFACE to upper level for proper linking
+    set(${LIB_NAME}_INTERFACE "${${LIB_NAME}_INTERFACE}" PARENT_SCOPE)
+endif()
diff --git a/src/gpu/nvidia/CMakeLists.txt b/src/gpu/nvidia/CMakeLists.txt
@@ -0,0 +1,51 @@
+#===============================================================================
+# Copyright 2020 Intel Corporation
+# Copyright 2020 Codeplay Software Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#===============================================================================
+
+file(GLOB_RECURSE SOURCES
+    ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
+    )
+
+set(OBJ_LIB ${LIB_NAME}_sycl_nvidia)
+add_library(${OBJ_LIB} OBJECT ${SOURCES})
+
+find_package(OpenCL REQUIRED)
+
+set_target_properties(
+    ${OBJ_LIB}
+    PROPERTIES
+        COMPILE_DEFINITIONS
+        "$<TARGET_PROPERTY:cuBLAS::cuBLAS,INTERFACE_COMPILE_DEFINITIONS>;$<TARGET_PROPERTY:cuDNN::cuDNN,INTERFACE_COMPILE_DEFINITIONS>"
+        COMPILE_OPTIONS
+        "$<TARGET_PROPERTY:cuBLAS::cuBLAS,INTERFACE_COMPILE_OPTIONS>;$<TARGET_PROPERTY:cuDNN::cuDNN,INTERFACE_COMPILE_OPTIONS>;$<TARGET_PROPERTY:OpenCL::OpenCL,INTERFACE_COMPILE_OPTIONS>"
+)
+target_include_directories(
+    ${OBJ_LIB}
+    PRIVATE $<TARGET_PROPERTY:OpenCL::OpenCL,INTERFACE_INCLUDE_DIRECTORIES>
+            $<TARGET_PROPERTY:cuDNN::cuDNN,INTERFACE_INCLUDE_DIRECTORIES>
+            $<TARGET_PROPERTY:cuBLAS::cuBLAS,INTERFACE_INCLUDE_DIRECTORIES>)
+
+add_library(${OBJ_LIB}_interface INTERFACE)
+target_link_libraries(${OBJ_LIB}_interface INTERFACE cuBLAS::cuBLAS
+                                                         cuDNN::cuDNN
+                                                         OpenCL::OpenCL)
+set_property(GLOBAL APPEND PROPERTY DNNL_LIB_DEPS
+    $<TARGET_OBJECTS:${OBJ_LIB}>)
+
+set(${LIB_NAME}_INTERFACE
+    ${${LIB_NAME}_INTERFACE} ${OBJ_LIB}_interface
+    PARENT_SCOPE)
-Original file line number
+Diff line change
@@ Expand Up / @@ -55,7 +55,6 @@ struct cpu_stream_t : public stream_t { @@
             threadpool_utils::deactivate_threadpool();
         }
     #endif
     };
     } // namespace cpu
@@ Expand Down @@