Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cherry-pick for 1.17.3 #20013

Merged
merged 22 commits into from
Mar 29, 2024
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
fc6436b
fix memory mapping on Windows (#19623)
yufenglee Feb 25, 2024
6772107
Use CMake's find package for CUDA libs (#19673)
gedoensmax Feb 27, 2024
ee6d01c
Update copying API header files (#19736)
mszhanyi Mar 2, 2024
24e83fa
[Apple framework] Fix minimal build with training enabled. (#19858)
edgchen1 Mar 12, 2024
b91354c
fix: "UserWarning: Unsupported Windows version (11). ONNX Runtime sup…
enximi Mar 15, 2024
a13e5d5
skip onnx node_tests for tensorrt ep (#19880)
jywu-msft Mar 13, 2024
247f8c5
Fix broken Pooling CUDA NHWC Ops and ensure NCHW / NHWC parity. (#19889)
mtavenrath Mar 20, 2024
0941cc7
update version number 1.17.3 + a commit for update marker
Mar 21, 2024
674c359
String Tensor SplitToSequence fix (#19942)
Craigacp Mar 20, 2024
d35360e
Fix GQA Rotary Embedding sequence length (#19801)
aciddelgado Mar 6, 2024
42ab62c
fix gqa rotary dim 1 (#19874)
aciddelgado Mar 13, 2024
c0b8f2e
Update replacing MultiHeadAttention with GroupQueryAttention (#19882)
kunal-vaishnavi Mar 13, 2024
0eeeadc
fix build error
tianleiwu Mar 22, 2024
38b8c67
Merge branch 'rel-1.17.3' into yguo/cherry-pick-for-1.17.3
Mar 22, 2024
fe0c113
Merge branch 'yguo/cherry-pick-for-1.17.3' of https://github.com/micr…
Mar 22, 2024
df049b3
Change nuget pipeline's "Windows_Packaging_combined_GPU" job to downl…
snnn Mar 14, 2024
6d96c65
Change nuget pipeline's "Final_Jar_Testing_Windows_GPU" job to downlo…
snnn Mar 14, 2024
6469bb5
Packed QKV and Rotary Embedding Support for sm<80 GQA (#20012)
aciddelgado Mar 23, 2024
e6c3d56
Add LLaMA end-to-end benchmarking (#19985)
kunal-vaishnavi Mar 22, 2024
c9ebded
Remove early stopping from LLaMA end-to-end benchmarking (#20033)
kunal-vaishnavi Mar 22, 2024
ed4edfe
[TensorRT EP] Fix concurrency issue for TRT custom op list (#20093)
chilo-ms Mar 27, 2024
5ac3b6f
Pin Onnx Version (#20073)
mszhanyi Mar 26, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION_NUMBER
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.17.2
1.17.3
4 changes: 4 additions & 0 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1396,6 +1396,10 @@ endif()
if (onnxruntime_USE_CUDA)
set(CMAKE_CUDA_RUNTIME_LIBRARY Shared)
set(CMAKE_CUDA_STANDARD 17)
if(onnxruntime_CUDA_HOME)
file(TO_CMAKE_PATH CUDAToolkit_ROOT ${onnxruntime_CUDA_HOME})
endif()
find_package(CUDAToolkit REQUIRED)
if(onnxruntime_CUDNN_HOME)
file(TO_CMAKE_PATH ${onnxruntime_CUDNN_HOME} onnxruntime_CUDNN_HOME)
endif()
Expand Down
2 changes: 1 addition & 1 deletion cmake/adjust_global_compile_flags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ endif()


macro(check_nvcc_compiler_flag _FLAG _RESULT)
execute_process(COMMAND ${onnxruntime_CUDA_HOME}/bin/nvcc "${_FLAG}" RESULT_VARIABLE NVCC_OUT ERROR_VARIABLE NVCC_ERROR)
execute_process(COMMAND ${CUDAToolkit_BIN_DIR}/nvcc "${_FLAG}" RESULT_VARIABLE NVCC_OUT ERROR_VARIABLE NVCC_ERROR)
message("NVCC_ERROR = ${NVCC_ERROR}")
message("NVCC_OUT = ${NVCC_OUT}")
if ("${NVCC_OUT}" MATCHES "0")
Expand Down
3 changes: 1 addition & 2 deletions cmake/external/onnxruntime_external_deps.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -542,16 +542,15 @@ message("Finished fetching external dependencies")
set(onnxruntime_LINK_DIRS )
if (onnxruntime_USE_CUDA)
#TODO: combine onnxruntime_CUDNN_HOME and onnxruntime_CUDA_HOME, assume they are the same
find_package(CUDAToolkit REQUIRED)
if (WIN32)
if(onnxruntime_CUDNN_HOME)
list(APPEND onnxruntime_LINK_DIRS ${onnxruntime_CUDNN_HOME}/lib ${onnxruntime_CUDNN_HOME}/lib/x64)
endif()
list(APPEND onnxruntime_LINK_DIRS ${onnxruntime_CUDA_HOME}/x64/lib64)
else()
if(onnxruntime_CUDNN_HOME)
list(APPEND onnxruntime_LINK_DIRS ${onnxruntime_CUDNN_HOME}/lib ${onnxruntime_CUDNN_HOME}/lib64)
endif()
list(APPEND onnxruntime_LINK_DIRS ${onnxruntime_CUDA_HOME}/lib64)
endif()
endif()

Expand Down
53 changes: 30 additions & 23 deletions cmake/onnxruntime_graph.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,26 @@ file(GLOB_RECURSE onnxruntime_graph_src CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/graph/*.cc"
)

# create empty list for any excludes
# start with empty training srcs list
set(orttraining_graph_src)

if (onnxruntime_ENABLE_TRAINING_OPS AND NOT onnxruntime_ENABLE_TRAINING)
set(orttraining_graph_src
"${ORTTRAINING_SOURCE_DIR}/core/graph/training_op_defs.cc"
"${ORTTRAINING_SOURCE_DIR}/core/graph/training_op_defs.h"
)
endif()

if (onnxruntime_ENABLE_TRAINING)
file(GLOB_RECURSE orttraining_graph_src CONFIGURE_DEPENDS
"${ORTTRAINING_SOURCE_DIR}/core/graph/*.h"
"${ORTTRAINING_SOURCE_DIR}/core/graph/*.cc"
)
endif()

# create empty lists for any excludes
set(onnxruntime_graph_src_exclude_patterns)
set(orttraining_graph_src_exclude_patterns)

if (onnxruntime_MINIMAL_BUILD)
# remove schema registration support
Expand All @@ -22,11 +40,18 @@ if (onnxruntime_MINIMAL_BUILD)
"${ONNXRUNTIME_ROOT}/core/graph/contrib_ops/onnx_function_util.cc"
"${ONNXRUNTIME_ROOT}/core/graph/contrib_ops/shape_inference_functions.h"
"${ONNXRUNTIME_ROOT}/core/graph/contrib_ops/shape_inference_functions.cc"
"${ONNXRUNTIME_ROOT}/core/graph/dml_ops/dml_defs.h"
"${ONNXRUNTIME_ROOT}/core/graph/dml_ops/dml_defs.cc"
"${ONNXRUNTIME_ROOT}/core/graph/function_template.h"
"${ONNXRUNTIME_ROOT}/core/graph/function_utils.h"
"${ONNXRUNTIME_ROOT}/core/graph/function_utils.cc"
)

list(APPEND orttraining_graph_src_exclude_patterns
"${ORTTRAINING_SOURCE_DIR}/core/graph/training_op_defs.h"
"${ORTTRAINING_SOURCE_DIR}/core/graph/training_op_defs.cc"
)

# no Function support initially
list(APPEND onnxruntime_graph_src_exclude_patterns
"${ONNXRUNTIME_ROOT}/core/graph/function*"
Expand Down Expand Up @@ -64,30 +89,12 @@ endif()
file(GLOB onnxruntime_graph_src_exclude ${onnxruntime_graph_src_exclude_patterns})
list(REMOVE_ITEM onnxruntime_graph_src ${onnxruntime_graph_src_exclude})

file(GLOB_RECURSE onnxruntime_ir_defs_src CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/defs/*.cc"
)

if (onnxruntime_ENABLE_TRAINING_OPS AND NOT onnxruntime_ENABLE_TRAINING)
set(orttraining_graph_src
"${ORTTRAINING_SOURCE_DIR}/core/graph/training_op_defs.cc"
"${ORTTRAINING_SOURCE_DIR}/core/graph/training_op_defs.h"
)
endif()

if (onnxruntime_ENABLE_TRAINING)
file(GLOB_RECURSE orttraining_graph_src CONFIGURE_DEPENDS
"${ORTTRAINING_SOURCE_DIR}/core/graph/*.h"
"${ORTTRAINING_SOURCE_DIR}/core/graph/*.cc"
)
endif()

set(onnxruntime_graph_lib_src ${onnxruntime_graph_src} ${onnxruntime_ir_defs_src})
if (onnxruntime_ENABLE_TRAINING_OPS)
list(APPEND onnxruntime_graph_lib_src ${orttraining_graph_src})
file(GLOB orttraining_graph_src_exclude ${orttraining_graph_src_exclude_patterns})
list(REMOVE_ITEM orttraining_graph_src ${orttraining_graph_src_exclude})
endif()

onnxruntime_add_static_library(onnxruntime_graph ${onnxruntime_graph_lib_src})
onnxruntime_add_static_library(onnxruntime_graph ${onnxruntime_graph_src} ${orttraining_graph_src})
add_dependencies(onnxruntime_graph onnx_proto flatbuffers::flatbuffers)
onnxruntime_add_include_to_target(onnxruntime_graph onnxruntime_common ${WIL_TARGET} onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers safeint_interface Boost::mp11)

Expand Down Expand Up @@ -120,7 +127,7 @@ endif()

set_target_properties(onnxruntime_graph PROPERTIES FOLDER "ONNXRuntime")
set_target_properties(onnxruntime_graph PROPERTIES LINKER_LANGUAGE CXX)
source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_graph_src} ${onnxruntime_ir_defs_src})
source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_graph_src})
if (onnxruntime_ENABLE_TRAINING_OPS)
source_group(TREE ${ORTTRAINING_ROOT} FILES ${orttraining_graph_src})
endif()
Expand Down
27 changes: 16 additions & 11 deletions cmake/onnxruntime_providers_cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -156,10 +156,16 @@
endif()

add_dependencies(${target} onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
target_link_libraries(${target} PRIVATE cublasLt cublas cudnn curand cufft ${ABSEIL_LIBS} ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 safeint_interface)
if(onnxruntime_CUDNN_HOME)
target_include_directories(${target} PRIVATE ${onnxruntime_CUDNN_HOME}/include)
target_link_directories(${target} PRIVATE ${onnxruntime_CUDNN_HOME}/lib)
if(onnxruntime_CUDA_MINIMAL)
target_compile_definitions(${target} PRIVATE USE_CUDA_MINIMAL)
target_link_libraries(${target} PRIVATE ${ABSEIL_LIBS} ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 safeint_interface CUDA::cudart)
else()
target_link_libraries(${target} PRIVATE CUDA::cublasLt CUDA::cublas cudnn CUDA::curand CUDA::cufft CUDA::cudart
${ABSEIL_LIBS} ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 safeint_interface)
if(onnxruntime_CUDNN_HOME)
target_include_directories(${target} PRIVATE ${onnxruntime_CUDNN_HOME}/include)
target_link_directories(${target} PRIVATE ${onnxruntime_CUDNN_HOME}/lib)
endif()
endif()

if (onnxruntime_USE_TRITON_KERNEL)
Expand All @@ -171,25 +177,24 @@
target_include_directories(${target} PRIVATE ${triton_kernel_header_dir})
target_link_libraries(${target} PUBLIC -Wl,--whole-archive ${triton_kernel_obj_file} -Wl,--no-whole-archive)
# lib cuda needed by cuLaunchKernel
target_link_libraries(${target} PRIVATE cuda)
target_link_libraries(${target} PRIVATE CUDA::cuda_driver)
endif()

include(cutlass)
target_include_directories(${target} PRIVATE ${cutlass_SOURCE_DIR}/include ${cutlass_SOURCE_DIR}/examples)

target_include_directories(${target} PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${eigen_INCLUDE_DIRS} ${TVM_INCLUDES} PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
target_include_directories(${target} PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${eigen_INCLUDE_DIRS} ${TVM_INCLUDES}
PUBLIC ${CUDAToolkit_INCLUDE_DIRS})
# ${CMAKE_CURRENT_BINARY_DIR} is so that #include "onnxruntime_config.h" inside tensor_shape.h is found
set_target_properties(${target} PROPERTIES LINKER_LANGUAGE CUDA)
set_target_properties(${target} PROPERTIES FOLDER "ONNXRuntime")

if (onnxruntime_ENABLE_CUDA_PROFILING) # configure cupti for cuda profiling
target_include_directories(${target} PRIVATE ${onnxruntime_CUDA_HOME}/extras/CUPTI/include)
target_link_directories(${target} PRIVATE ${onnxruntime_CUDA_HOME}/extras/CUPTI/lib64)
target_link_libraries(${target} PRIVATE cupti)
target_link_libraries(${target} PRIVATE CUDA::cupti)
endif()

if (onnxruntime_ENABLE_NVTX_PROFILE AND NOT WIN32)
target_link_libraries(${target} PRIVATE nvToolsExt)
if (onnxruntime_ENABLE_NVTX_PROFILE)
target_link_libraries(${target} PRIVATE CUDA::nvtx3)
endif()

if (onnxruntime_ENABLE_TRAINING_OPS)
Expand Down
11 changes: 6 additions & 5 deletions cmake/onnxruntime_providers_tensorrt.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
set(BUILD_LIBRARY_ONLY 1)
add_definitions("-DONNX_ML=1")
add_definitions("-DONNX_NAMESPACE=onnx")
set(CUDA_INCLUDE_DIRS ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
set(CUDA_INCLUDE_DIRS ${CUDAToolkit_INCLUDE_DIRS})
set(TENSORRT_ROOT ${onnxruntime_TENSORRT_HOME})
set(OLD_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
set(PROTOBUF_LIBRARY ${PROTOBUF_LIB})
Expand Down Expand Up @@ -58,7 +58,7 @@
URL_HASH SHA1=${DEP_SHA1_onnx_tensorrt}
)
if (NOT CUDA_INCLUDE_DIR)
set(CUDA_INCLUDE_DIR ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) # onnx-tensorrt repo needs this variable to build
set(CUDA_INCLUDE_DIR ${CUDAToolkit_INCLUDE_DIRS}) # onnx-tensorrt repo needs this variable to build
endif()
# The onnx_tensorrt repo contains a test program, getSupportedAPITest, which doesn't support Windows. It uses
# unistd.h. So we must exclude it from our build. onnxruntime_fetchcontent_makeavailable is for the purpose.
Expand Down Expand Up @@ -102,11 +102,12 @@
onnxruntime_add_include_to_target(onnxruntime_providers_tensorrt onnxruntime_common onnx flatbuffers::flatbuffers Boost::mp11 safeint_interface)
add_dependencies(onnxruntime_providers_tensorrt onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
if (onnxruntime_USE_TENSORRT_BUILTIN_PARSER)
target_link_libraries(onnxruntime_providers_tensorrt PRIVATE ${trt_link_libs} cudart ${ONNXRUNTIME_PROVIDERS_SHARED} ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface ${ABSEIL_LIBS})
target_link_libraries(onnxruntime_providers_tensorrt PRIVATE ${trt_link_libs} ${ONNXRUNTIME_PROVIDERS_SHARED} ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface ${ABSEIL_LIBS} PUBLIC CUDA::cudart)
else()
target_link_libraries(onnxruntime_providers_tensorrt PRIVATE ${onnxparser_link_libs} ${trt_link_libs} cudart ${ONNXRUNTIME_PROVIDERS_SHARED} ${PROTOBUF_LIB} flatbuffers::flatbuffers ${ABSEIL_LIBS})
target_link_libraries(onnxruntime_providers_tensorrt PRIVATE ${onnxparser_link_libs} ${trt_link_libs} ${ONNXRUNTIME_PROVIDERS_SHARED} ${PROTOBUF_LIB} flatbuffers::flatbuffers ${ABSEIL_LIBS} PUBLIC CUDA::cudart)
endif()
target_include_directories(onnxruntime_providers_tensorrt PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${eigen_INCLUDE_DIRS} PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
target_include_directories(onnxruntime_providers_tensorrt PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${eigen_INCLUDE_DIRS}
PUBLIC ${CUDAToolkit_INCLUDE_DIRS})
if(onnxruntime_CUDNN_HOME)
target_include_directories(onnxruntime_providers_tensorrt PRIVATE ${onnxruntime_CUDNN_HOME}/include)
endif()
Expand Down
5 changes: 1 addition & 4 deletions cmake/onnxruntime_python.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -283,10 +283,7 @@ if (WIN32)
get_filename_component(CUDNN_DLL_NAME ${CUDNN_DLL_PATH} NAME_WE)
string(REPLACE "cudnn64_" "" CUDNN_VERSION "${CUDNN_DLL_NAME}")
if(NOT onnxruntime_CUDA_VERSION)
message("Reading json file ${onnxruntime_CUDA_HOME}/version.json")
set(CUDA_SDK_JSON_FILE_PATH "${onnxruntime_CUDA_HOME}/version.json")
file(READ ${CUDA_SDK_JSON_FILE_PATH} CUDA_SDK_JSON_CONTENT)
string(JSON onnxruntime_CUDA_VERSION GET ${CUDA_SDK_JSON_CONTENT} "cuda" "version")
set(onnxruntime_CUDA_VERSION ${CUDAToolkit_VERSION})
message("onnxruntime_CUDA_VERSION=${onnxruntime_CUDA_VERSION}")
endif()
file(APPEND "${VERSION_INFO_FILE}"
Expand Down
4 changes: 2 additions & 2 deletions cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ function(AddTest)
if(onnxruntime_USE_CUDA)
#XXX: we should not need to do this. onnxruntime_test_all.exe should not have direct dependency on CUDA DLLs,
# otherwise it will impact when CUDA DLLs can be unloaded.
target_link_libraries(${_UT_TARGET} PRIVATE cudart)
target_link_libraries(${_UT_TARGET} PRIVATE CUDA::cudart)
endif()
target_link_libraries(${_UT_TARGET} PRIVATE ${_UT_LIBS} GTest::gtest GTest::gmock ${onnxruntime_EXTERNAL_LIBRARIES})
endif()
Expand Down Expand Up @@ -1275,7 +1275,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
list(APPEND onnxruntime_shared_lib_test_LIBS cpuinfo)
endif()
if (onnxruntime_USE_CUDA)
list(APPEND onnxruntime_shared_lib_test_LIBS cudart)
list(APPEND onnxruntime_shared_lib_test_LIBS CUDA::cudart)
endif()
if (onnxruntime_USE_TENSORRT)
list(APPEND onnxruntime_shared_lib_test_LIBS ${TENSORRT_LIBRARY_INFER})
Expand Down
5 changes: 5 additions & 0 deletions docs/python/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ For more information on ONNX Runtime, please see `aka.ms/onnxruntime <https://ak


Changes

1.17.3
^^^^^^

Release Notes : https://github.com/Microsoft/onnxruntime/releases/tag/v1.17.3
-------

1.17.2
Expand Down
2 changes: 1 addition & 1 deletion js/common/lib/version.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
// This file is generated by /js/scripts/update-version.ts
// Do not modify file content manually.

export const version = '1.17.2';
export const version = '1.17.3';
4 changes: 2 additions & 2 deletions js/common/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion js/common/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"license": "MIT",
"type": "module",
"name": "onnxruntime-common",
"version": "1.17.2",
"version": "1.17.3",
"repository": {
"url": "https://github.com/Microsoft/onnxruntime.git",
"type": "git"
Expand Down
2 changes: 1 addition & 1 deletion js/node/lib/version.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
// This file is generated by /js/scripts/update-version.ts
// Do not modify file content manually.

export const version = '1.17.2';
export const version = '1.17.3';
2 changes: 1 addition & 1 deletion js/node/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion js/node/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
3
]
},
"version": "1.17.2",
"version": "1.17.3",
"dependencies": {
"onnxruntime-common": "file:../common"
},
Expand Down
2 changes: 1 addition & 1 deletion js/react_native/lib/version.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
// This file is generated by /js/scripts/update-version.ts
// Do not modify file content manually.

export const version = '1.17.2';
export const version = '1.17.3';
2 changes: 1 addition & 1 deletion js/react_native/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
"registry": "https://registry.npmjs.org/"
},
"source": "lib/index",
"version": "1.17.2",
"version": "1.17.3",
"main": "dist/commonjs/index",
"homepage": "https://github.com/microsoft/onnxruntime/blob/main/js/react_native/README.md",
"files": [
Expand Down
2 changes: 1 addition & 1 deletion js/react_native/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -5254,7 +5254,7 @@ onetime@^5.1.0, onetime@^5.1.2:
mimic-fn "^2.1.0"

"onnxruntime-common@file:../common":
version "1.17.2"
version "1.17.3"

open@^6.2.0:
version "6.4.0"
Expand Down
2 changes: 1 addition & 1 deletion js/web/lib/version.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
// This file is generated by /js/scripts/update-version.ts
// Do not modify file content manually.

export const version = '1.17.2';
export const version = '1.17.3';
2 changes: 1 addition & 1 deletion js/web/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion onnxruntime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
For more information on ONNX Runtime, please see `aka.ms/onnxruntime <https://aka.ms/onnxruntime/>`_
or the `Github project <https://github.com/microsoft/onnxruntime/>`_.
"""
__version__ = "1.17.2"
__version__ = "1.17.3"
__author__ = "Microsoft"

# we need to do device version validation (for example to check Cuda version for an onnxruntime-training package).
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/contrib_ops/cpu/bert/attention_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ struct GroupQueryAttentionParameters {
int kv_hidden_size;
int kv_num_heads;
int num_splits; // number of splits for splitkv
int rotary_dim; // rotary embedding dimension
bool is_unidirectional; // causal
int local_window_size;
bool kv_share_buffer;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,7 @@ Status mha_fwd_kvcache(const cudaDeviceProp& dprops,
int seqlen_q,
int seqlen_k,
int seqlen_k_new,
int rotary_dim,
const float softmax_scale,
bool is_causal,
bool is_bf16,
Expand Down Expand Up @@ -448,7 +449,7 @@ Status mha_fwd_kvcache(const cudaDeviceProp& dprops,
params.rotary_cos_ptr = rotary_cos;
params.rotary_sin_ptr = rotary_sin;
params.is_rotary_interleaved = is_rotary_interleaved;
params.rotary_dim = (head_size / 16) * 16;
params.rotary_dim = rotary_dim;
}

params.num_splits = num_splits;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ Status mha_fwd_kvcache(const cudaDeviceProp& dprops,
int seqlen_q,
int seqlen_k,
int seqlen_k_new,
int rotary_dim,
const float softmax_scale,
bool is_causal,
bool is_bf16,
Expand Down
Loading
Loading