Skip to content

Commit

Permalink
[fix][index] Accelerate vector index initial build.
Browse files Browse the repository at this point in the history
Signed-off-by: Ketor <[email protected]>
  • Loading branch information
ketor authored and rock-git committed Dec 1, 2023
1 parent 530eff9 commit 73a20ee
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 9 deletions.
2 changes: 1 addition & 1 deletion src/common/constant.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ class Constant {

static const uint32_t kLoadOrBuildVectorIndexConcurrency = 5;

static const uint32_t kBuildVectorIndexBatchSize = 8192;
static const uint32_t kBuildVectorIndexBatchSize = 32768;

static constexpr int32_t kCreateIvfFlatParamNcentroids = 2048;
static constexpr int32_t kSearchIvfFlatParamNprobe = 80;
Expand Down
16 changes: 8 additions & 8 deletions src/vector/vector_index_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,9 @@ VectorIndexPtr VectorIndexManager::BuildVectorIndex(VectorIndexWrapperPtr vector

auto raw_engine = Server::GetInstance().GetRawEngine(region->GetRawEngineType());
auto iter = raw_engine->Reader()->NewIterator(Constant::kVectorDataCF, options);
if (iter == nullptr) {
DINGO_LOG(FATAL) << fmt::format("[vector_index.build][index_id({})] NewIterator failed.", vector_index_id);
}

// Note: This is iterated 2 times for the following reasons:
// ivf_flat must train first before adding data
Expand Down Expand Up @@ -659,28 +662,25 @@ VectorIndexPtr VectorIndexManager::BuildVectorIndex(VectorIndexWrapperPtr vector
if ((count + 1) % Constant::kBuildVectorIndexBatchSize == 0) {
int64_t upsert_start_time = Helper::TimestampMs();

vector_index->Upsert(vectors);
vector_index->Add(vectors);

upsert_use_time += (Helper::TimestampMs() - upsert_start_time);
vectors.clear();

// yield, for other bthread run.
bthread_yield();
}

// Print build progress
if ((count + 1) % (Constant::kBuildVectorIndexBatchSize * 10) == 0) {
DINGO_LOG(INFO) << fmt::format(
"[vector_index.build][index_id({})][trace({})] Build vector index progress, parallel({}) count({}) elapsed "
"time({}/{}ms)",
vector_index_id, trace, vector_index->WriteOpParallelNum(), count, upsert_use_time,
Helper::TimestampMs() - start_time);

// yield, for other bthread run.
bthread_yield();
}
}

if (!vectors.empty()) {
int64_t upsert_start_time = Helper::TimestampMs();
vector_index->Upsert(vectors);
vector_index->Add(vectors);
upsert_use_time += (Helper::TimestampMs() - upsert_start_time);
}

Expand Down
6 changes: 6 additions & 0 deletions src/vector/vector_index_snapshot_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -829,6 +829,8 @@ std::shared_ptr<VectorIndex> VectorIndexSnapshotManager::LoadVectorIndexSnapshot
VectorIndexWrapperPtr vector_index_wrapper, const pb::common::RegionEpoch& epoch) {
assert(vector_index_wrapper != nullptr);

int64_t start_time_ms = Helper::TimestampMs();

int64_t vector_index_id = vector_index_wrapper->Id();
auto snapshot_set = vector_index_wrapper->SnapshotSet();

Expand Down Expand Up @@ -893,6 +895,10 @@ std::shared_ptr<VectorIndex> VectorIndexSnapshotManager::LoadVectorIndexSnapshot
vector_index->SetSnapshotLogId(last_snapshot->SnapshotLogId());
vector_index->SetApplyLogId(last_snapshot->SnapshotLogId());

DINGO_LOG(INFO) << fmt::format(
"[vector_index.load_snapshot][index_id({})] Load vector index snapshot snapshot_{:020} elapsed time {}ms",
vector_index_id, last_snapshot->SnapshotLogId(), Helper::TimestampMs() - start_time_ms);

return vector_index;
}

Expand Down

0 comments on commit 73a20ee

Please sign in to comment.