Skip to content

Commit

Permalink
[feat][index] Add simd cpu instruction set. Automatically select the CPU
Browse files Browse the repository at this point in the history
instruction set according to the running environment.
  • Loading branch information
Haijun Yu authored and ketor committed May 17, 2024
1 parent 271de91 commit c5d452e
Show file tree
Hide file tree
Showing 19 changed files with 2,271 additions and 3 deletions.
57 changes: 57 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,63 @@ endif()

list(REMOVE_ITEM SERVER_SRCS "${PROJECT_SOURCE_DIR}/src/server/main.cc")

include(CheckSymbolExists)

macro(detect_target_arch)
check_symbol_exists(__aarch64__ "" __AARCH64)
check_symbol_exists(__x86_64__ "" __X86_64)

if(NOT __AARCH64 AND NOT __X86_64)
message(FATAL "dingodb only support amd64 and arm64.")
endif()
endmacro()

detect_target_arch()

if(__X86_64)
set(SIMD_UTILS_SRC ${PROJECT_SOURCE_DIR}/src/simd/distances_ref.cc
${PROJECT_SOURCE_DIR}/src/simd/hook.cc)
set(SIMD_UTILS_SSE_SRC ${PROJECT_SOURCE_DIR}/src/simd/distances_sse.cc)
set(SIMD_UTILS_AVX_SRC ${PROJECT_SOURCE_DIR}/src/simd/distances_avx.cc)
set(SIMD_UTILS_AVX512_SRC ${PROJECT_SOURCE_DIR}/src/simd/distances_avx512.cc)

add_library(simd_utils_sse OBJECT ${SIMD_UTILS_SSE_SRC})
add_library(simd_utils_avx OBJECT ${SIMD_UTILS_AVX_SRC})
add_library(simd_utils_avx512 OBJECT ${SIMD_UTILS_AVX512_SRC})

target_compile_options(simd_utils_sse PRIVATE -msse4.2)
target_compile_options(simd_utils_avx PRIVATE -mf16c -mavx2)
target_compile_options(simd_utils_avx512 PRIVATE -mf16c -mavx512f -mavx512dq
-mavx512bw)

add_library(
simd_utils STATIC
${SIMD_UTILS_SRC} $<TARGET_OBJECTS:simd_utils_sse>
$<TARGET_OBJECTS:simd_utils_avx> $<TARGET_OBJECTS:simd_utils_avx512>)
# target_link_libraries(simd_utils PUBLIC glog::glog)
endif()

if(__AARCH64)
set(SIMD_UTILS_SRC ${PROJECT_SOURCE_DIR}/src/simd/hook.cc
${PROJECT_SOURCE_DIR}/src/simd/distances_ref.cc)
add_library(simd_utils STATIC ${SIMD_UTILS_SRC})
# target_link_libraries(simd_utils PUBLIC glog::glog)
endif()

if(CMAKE_BUILD_TYPE MATCHES "Debug")
set(SIMD_LIBRARIES
"${CMAKE_BINARY_DIR}/libsimd_utilsd.a"
CACHE FILEPATH "simd library." FORCE)
else()
set(SIMD_LIBRARIES
"${CMAKE_BINARY_DIR}/libsimd_utils.a"
CACHE FILEPATH "simd library." FORCE)
endif()

set(DEPEND_LIBS ${DEPEND_LIBS} simd_utils)

set(DYNAMIC_LIB ${DYNAMIC_LIB} ${SIMD_LIBRARIES})

# object file
add_library(DINGODB_OBJS
OBJECT
Expand Down
2 changes: 1 addition & 1 deletion contrib/hnswlib
199 changes: 199 additions & 0 deletions src/simd/distances_avx.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
// Copyright (c) 2023 dingodb.com, Inc. All Rights Reserved
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Copyright (C) 2019-2023 Zilliz. All rights reserved.

#if defined(__x86_64__)

#include "simd/distances_avx.h"

#include <immintrin.h>

#include <cassert>

namespace dingodb {

#define ALIGNED(x) __attribute__((aligned(x)))

// reads 0 <= d < 4 floats as __m128
static inline __m128 masked_read(int d, const float* x) {
assert(0 <= d && d < 4);
ALIGNED(16) float buf[4] = {0, 0, 0, 0};
switch (d) {
case 3:
buf[2] = x[2];
case 2:
buf[1] = x[1];
case 1:
buf[0] = x[0];
}
return _mm_load_ps(buf);
// cannot use AVX2 _mm_mask_set1_epi32
}

float fvec_inner_product_avx(const float* x, const float* y, size_t d) {
__m256 msum1 = _mm256_setzero_ps();

while (d >= 8) {
__m256 mx = _mm256_loadu_ps(x);
x += 8;
__m256 my = _mm256_loadu_ps(y);
y += 8;
msum1 = _mm256_add_ps(msum1, _mm256_mul_ps(mx, my));
d -= 8;
}

__m128 msum2 = _mm256_extractf128_ps(msum1, 1);
msum2 = _mm_add_ps(msum2, _mm256_extractf128_ps(msum1, 0));

if (d >= 4) {
__m128 mx = _mm_loadu_ps(x);
x += 4;
__m128 my = _mm_loadu_ps(y);
y += 4;
msum2 = _mm_add_ps(msum2, _mm_mul_ps(mx, my));
d -= 4;
}

if (d > 0) {
__m128 mx = masked_read(d, x);
__m128 my = masked_read(d, y);
msum2 = _mm_add_ps(msum2, _mm_mul_ps(mx, my));
}

msum2 = _mm_hadd_ps(msum2, msum2);
msum2 = _mm_hadd_ps(msum2, msum2);
return _mm_cvtss_f32(msum2);
}

float fvec_L2sqr_avx(const float* x, const float* y, size_t d) {
__m256 msum1 = _mm256_setzero_ps();

while (d >= 8) {
__m256 mx = _mm256_loadu_ps(x);
x += 8;
__m256 my = _mm256_loadu_ps(y);
y += 8;
const __m256 a_m_b1 = _mm256_sub_ps(mx, my);
msum1 = _mm256_add_ps(msum1, _mm256_mul_ps(a_m_b1, a_m_b1));
d -= 8;
}

__m128 msum2 = _mm256_extractf128_ps(msum1, 1);
msum2 = _mm_add_ps(msum2, _mm256_extractf128_ps(msum1, 0));

if (d >= 4) {
__m128 mx = _mm_loadu_ps(x);
x += 4;
__m128 my = _mm_loadu_ps(y);
y += 4;
const __m128 a_m_b1 = _mm_sub_ps(mx, my);
msum2 = _mm_add_ps(msum2, _mm_mul_ps(a_m_b1, a_m_b1));
d -= 4;
}

if (d > 0) {
__m128 mx = masked_read(d, x);
__m128 my = masked_read(d, y);
__m128 a_m_b1 = _mm_sub_ps(mx, my);
msum2 = _mm_add_ps(msum2, _mm_mul_ps(a_m_b1, a_m_b1));
}

msum2 = _mm_hadd_ps(msum2, msum2);
msum2 = _mm_hadd_ps(msum2, msum2);
return _mm_cvtss_f32(msum2);
}

float fvec_L1_avx(const float* x, const float* y, size_t d) {
__m256 msum1 = _mm256_setzero_ps();
__m256 signmask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffffUL));

while (d >= 8) {
__m256 mx = _mm256_loadu_ps(x);
x += 8;
__m256 my = _mm256_loadu_ps(y);
y += 8;
const __m256 a_m_b = _mm256_sub_ps(mx, my);
msum1 = _mm256_add_ps(msum1, _mm256_and_ps(signmask, a_m_b));
d -= 8;
}

__m128 msum2 = _mm256_extractf128_ps(msum1, 1);
msum2 = _mm_add_ps(msum2, _mm256_extractf128_ps(msum1, 0));
__m128 signmask2 = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffffUL));

if (d >= 4) {
__m128 mx = _mm_loadu_ps(x);
x += 4;
__m128 my = _mm_loadu_ps(y);
y += 4;
const __m128 a_m_b = _mm_sub_ps(mx, my);
msum2 = _mm_add_ps(msum2, _mm_and_ps(signmask2, a_m_b));
d -= 4;
}

if (d > 0) {
__m128 mx = masked_read(d, x);
__m128 my = masked_read(d, y);
__m128 a_m_b = _mm_sub_ps(mx, my);
msum2 = _mm_add_ps(msum2, _mm_and_ps(signmask2, a_m_b));
}

msum2 = _mm_hadd_ps(msum2, msum2);
msum2 = _mm_hadd_ps(msum2, msum2);
return _mm_cvtss_f32(msum2);
}

float fvec_Linf_avx(const float* x, const float* y, size_t d) {
__m256 msum1 = _mm256_setzero_ps();
__m256 signmask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffffUL));

while (d >= 8) {
__m256 mx = _mm256_loadu_ps(x);
x += 8;
__m256 my = _mm256_loadu_ps(y);
y += 8;
const __m256 a_m_b = _mm256_sub_ps(mx, my);
msum1 = _mm256_max_ps(msum1, _mm256_and_ps(signmask, a_m_b));
d -= 8;
}

__m128 msum2 = _mm256_extractf128_ps(msum1, 1);
msum2 = _mm_max_ps(msum2, _mm256_extractf128_ps(msum1, 0));
__m128 signmask2 = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffffUL));

if (d >= 4) {
__m128 mx = _mm_loadu_ps(x);
x += 4;
__m128 my = _mm_loadu_ps(y);
y += 4;
const __m128 a_m_b = _mm_sub_ps(mx, my);
msum2 = _mm_max_ps(msum2, _mm_and_ps(signmask2, a_m_b));
d -= 4;
}

if (d > 0) {
__m128 mx = masked_read(d, x);
__m128 my = masked_read(d, y);
__m128 a_m_b = _mm_sub_ps(mx, my);
msum2 = _mm_max_ps(msum2, _mm_and_ps(signmask2, a_m_b));
}

msum2 = _mm_max_ps(_mm_movehl_ps(msum2, msum2), msum2);
msum2 = _mm_max_ps(msum2, _mm_shuffle_ps(msum2, msum2, 1));
return _mm_cvtss_f32(msum2);
}

} // namespace dingodb
#endif
39 changes: 39 additions & 0 deletions src/simd/distances_avx.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Copyright (c) 2023 dingodb.com, Inc. All Rights Reserved
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Copyright (C) 2019-2023 Zilliz. All rights reserved.

#ifndef DINGODB_SIMD_DISTANCES_AVX_H_
#define DINGODB_SIMD_DISTANCES_AVX_H_

#include <cstddef>
#include <cstdint>

namespace dingodb {

/// Squared L2 distance between two vectors
float fvec_L2sqr_avx(const float* x, const float* y, size_t d);

/// inner product
float fvec_inner_product_avx(const float* x, const float* y, size_t d);

/// L1 distance
float fvec_L1_avx(const float* x, const float* y, size_t d);

/// infinity distance
float fvec_Linf_avx(const float* x, const float* y, size_t d);

} // namespace dingodb

#endif // DINGODB_SIMD_DISTANCES_AVX_H_ //NOLINT
Loading

0 comments on commit c5d452e

Please sign in to comment.