Skip to content

Commit

Permalink
[feat][benchmark] Support vector query benchmark.
Browse files Browse the repository at this point in the history
  • Loading branch information
rock-git authored and ketor committed Apr 18, 2024
1 parent 43a7d4e commit f872578
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 27 deletions.
20 changes: 7 additions & 13 deletions src/benchmark/benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,9 @@ DEFINE_uint32(vector_index_id, 0, "Vector index id");
DEFINE_string(vector_index_name, "", "Vector index name");

DEFINE_uint32(vector_search_topk, 10, "Vector search flag topk");
DEFINE_bool(vector_search_with_vector_data, true, "Vector search flag with_vector_data");
DEFINE_bool(vector_search_with_scalar_data, false, "Vector search flag with_scalar_data");
DEFINE_bool(vector_search_with_table_data, false, "Vector search flag with_table_data");
DEFINE_bool(with_vector_data, true, "Vector search flag with_vector_data");
DEFINE_bool(with_scalar_data, false, "Vector search flag with_scalar_data");
DEFINE_bool(with_table_data, false, "Vector search flag with_table_data");
DEFINE_bool(vector_search_use_brute_force, false, "Vector search flag use_brute_force");
DEFINE_bool(vector_search_enable_range_search, false, "Vector search flag enable_range_search");
DEFINE_double(vector_search_radius, 0.1, "Vector search flag radius");
Expand Down Expand Up @@ -150,7 +150,7 @@ static bool IsTransactionBenchmark() {

static bool IsVectorBenchmark() {
return FLAGS_benchmark == "fillvectorseq" || FLAGS_benchmark == "fillvectorrandom" ||
FLAGS_benchmark == "searchvector";
FLAGS_benchmark == "searchvector" || FLAGS_benchmark == "queryvector";
}

Stats::Stats() {
Expand Down Expand Up @@ -884,15 +884,9 @@ void Environment::PrintParam() {
std::cout << fmt::format("{:<34}: {:>32}", "ivf_nbits_per_idx", FLAGS_ivf_nbits_per_idx) << '\n';

std::cout << fmt::format("{:<34}: {:>32}", "vector_search_topk", FLAGS_vector_search_topk) << '\n';
std::cout << fmt::format("{:<34}: {:>32}", "vector_search_with_vector_data",
FLAGS_vector_search_with_vector_data ? "true" : "false")
<< '\n';
std::cout << fmt::format("{:<34}: {:>32}", "vector_search_with_scalar_data",
FLAGS_vector_search_with_scalar_data ? "true" : "false")
<< '\n';
std::cout << fmt::format("{:<34}: {:>32}", "vector_search_with_table_data",
FLAGS_vector_search_with_table_data ? "true" : "false")
<< '\n';
std::cout << fmt::format("{:<34}: {:>32}", "with_vector_data", FLAGS_with_vector_data ? "true" : "false") << '\n';
std::cout << fmt::format("{:<34}: {:>32}", "with_scalar_data", FLAGS_with_scalar_data ? "true" : "false") << '\n';
std::cout << fmt::format("{:<34}: {:>32}", "with_table_data", FLAGS_with_table_data ? "true" : "false") << '\n';
std::cout << fmt::format("{:<34}: {:>32}", "vector_search_use_brute_force",
FLAGS_vector_search_use_brute_force ? "true" : "false")
<< '\n';
Expand Down
1 change: 0 additions & 1 deletion src/benchmark/dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ class Dataset {

struct TestEntry {
sdk::VectorWithId vector_with_id;
// std::vector<Neighbor> neighbors;
std::unordered_map<int64_t, float> neighbors;
};
using TestEntryPtr = std::shared_ptr<TestEntry>;
Expand Down
6 changes: 3 additions & 3 deletions src/benchmark/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ static std::string GetUsageMessage() {
message += "\n --vector_index_name vector index name, default()";
message += "\n --vector_search_arrange_data arrange data, default(true)";
message += "\n --vector_search_topk vector search flag topk, default(10)";
message += "\n --vector_search_with_vector_data vector search flag with_vector_data, default(true)";
message += "\n --vector_search_with_scalar_data vector search flag with_scalar_data, default(false)";
message += "\n --vector_search_with_table_data vector search flag with_table_data, default(false)";
message += "\n --with_vector_data vector search flag with_vector_data, default(true)";
message += "\n --with_scalar_data vector search flag with_scalar_data, default(false)";
message += "\n --with_table_data vector search flag with_table_data, default(false)";
message += "\n --vector_search_use_brute_force vector search flag use_brute_force, default(false)";
message += "\n --vector_search_enable_range_search vector search flag enable_range_search, default(false)";
message += "\n --vector_search_radius vector search flag radius, default(0.1)";
Expand Down
100 changes: 91 additions & 9 deletions src/benchmark/operation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ DEFINE_validator(txn_isolation_level, [](const char*, const std::string& value)
// vector search
DECLARE_string(vector_dataset);
DECLARE_uint32(vector_search_topk);
DECLARE_bool(vector_search_with_vector_data);
DECLARE_bool(vector_search_with_scalar_data);
DECLARE_bool(vector_search_with_table_data);
DECLARE_bool(with_vector_data);
DECLARE_bool(with_scalar_data);
DECLARE_bool(with_table_data);
DECLARE_bool(vector_search_use_brute_force);
DECLARE_bool(vector_search_enable_range_search);
DECLARE_double(vector_search_radius);
Expand Down Expand Up @@ -141,6 +141,10 @@ static OperationBuilderMap support_operations = {
[](std::shared_ptr<sdk::Client> client) -> OperationPtr {
return std::make_shared<VectorSearchOperation>(client);
}},
{"queryvector",
[](std::shared_ptr<sdk::Client> client) -> OperationPtr {
return std::make_shared<VectorQueryOperation>(client);
}},
};

static sdk::TransactionIsolation GetTxnIsolationLevel() {
Expand Down Expand Up @@ -560,6 +564,31 @@ Operation::Result BaseOperation::VectorSearch(VectorIndexEntryPtr entry,
return result;
}

Operation::Result BaseOperation::VectorBatchQuery(VectorIndexEntryPtr entry, const sdk::QueryParam& query_param) {
Operation::Result result;

result.write_bytes = query_param.vector_ids.size() * sizeof(int64_t);

int64_t start_time = Helper::TimestampUs();

sdk::VectorClient* vector_client = nullptr;
result.status = client->NewVectorClient(&vector_client);
if (!result.status.IsOK()) {
return result;
}

result.status = vector_client->BatchQueryByIndexId(entry->index_id, query_param, result.vector_query_result);
if (!result.status.IsOK()) {
LOG(ERROR) << fmt::format("query vector failed, error: {}", result.status.ToString());
}

result.eplased_time = Helper::TimestampUs() - start_time;

delete vector_client;

return result;
}

Operation::Result FillSeqOperation::Execute(RegionEntryPtr region_entry) {
return FLAGS_batch_size == 1 ? KvPut(region_entry, false) : KvBatchPut(region_entry, false);
}
Expand Down Expand Up @@ -1096,9 +1125,9 @@ Operation::Result VectorSearchOperation::ExecuteAutoData(VectorIndexEntryPtr ent
vector_with_ids.reserve(FLAGS_batch_size);

sdk::SearchParam search_param;
search_param.with_vector_data = FLAGS_vector_search_with_vector_data;
search_param.with_scalar_data = FLAGS_vector_search_with_scalar_data;
search_param.with_table_data = FLAGS_vector_search_with_table_data;
search_param.with_vector_data = FLAGS_with_vector_data;
search_param.with_scalar_data = FLAGS_with_scalar_data;
search_param.with_table_data = FLAGS_with_table_data;
search_param.use_brute_force = FLAGS_vector_search_use_brute_force;

if (FLAGS_vector_search_enable_range_search) {
Expand Down Expand Up @@ -1155,9 +1184,9 @@ Operation::Result VectorSearchOperation::ExecuteManualData(VectorIndexEntryPtr e
vector_with_ids.reserve(FLAGS_batch_size);

sdk::SearchParam search_param;
search_param.with_vector_data = FLAGS_vector_search_with_vector_data;
search_param.with_scalar_data = FLAGS_vector_search_with_scalar_data;
search_param.with_table_data = FLAGS_vector_search_with_table_data;
search_param.with_vector_data = FLAGS_with_vector_data;
search_param.with_scalar_data = FLAGS_with_scalar_data;
search_param.with_table_data = FLAGS_with_table_data;
search_param.use_brute_force = FLAGS_vector_search_use_brute_force;
search_param.topk = FLAGS_vector_search_topk;
search_param.extra_params.insert(std::make_pair(sdk::SearchExtraParamType::kEfSearch, FLAGS_vector_search_ef));
Expand Down Expand Up @@ -1218,6 +1247,59 @@ Operation::Result VectorSearchOperation::ExecuteManualData(VectorIndexEntryPtr e
return result;
}

Operation::Result VectorQueryOperation::Execute(VectorIndexEntryPtr entry) {
return FLAGS_vector_dataset.empty() ? ExecuteAutoData(entry) : ExecuteManualData(entry);
}

Operation::Result VectorQueryOperation::ExecuteAutoData(VectorIndexEntryPtr entry) {
std::vector<int64_t> vector_ids;
vector_ids.reserve(FLAGS_batch_size);

sdk::QueryParam query_param;
query_param.with_vector_data = FLAGS_with_vector_data;
query_param.with_scalar_data = FLAGS_with_scalar_data;
query_param.with_table_data = FLAGS_with_table_data;

if (FLAGS_batch_size <= 1) {
vector_ids.push_back(entry->GenId());
} else {
for (int i = 0; i < FLAGS_batch_size; ++i) {
vector_ids.push_back(entry->GenId());
}
}

query_param.vector_ids = vector_ids;

return VectorBatchQuery(entry, query_param);
}

Operation::Result VectorQueryOperation::ExecuteManualData(VectorIndexEntryPtr entry) {
std::vector<int64_t> vector_ids;
vector_ids.reserve(FLAGS_batch_size);

sdk::QueryParam query_param;
query_param.with_vector_data = FLAGS_with_vector_data;
query_param.with_scalar_data = FLAGS_with_scalar_data;
query_param.with_table_data = FLAGS_with_table_data;

auto offset = entry->GenId();
auto& all_test_entries = entry->test_entries;

if (FLAGS_batch_size <= 1) {
auto& test_entry = all_test_entries[offset % all_test_entries.size()];
vector_ids.push_back(test_entry->vector_with_id.id);
} else {
for (size_t i = offset; i < FLAGS_batch_size; ++i) {
auto& test_entry = all_test_entries[i % all_test_entries.size()];
vector_ids.push_back(test_entry->vector_with_id.id);
}
}

query_param.vector_ids = vector_ids;

return VectorBatchQuery(entry, query_param);
}

bool IsSupportBenchmarkType(const std::string& benchmark) {
auto it = support_operations.find(benchmark);
return it != support_operations.end();
Expand Down
15 changes: 15 additions & 0 deletions src/benchmark/operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class Operation {

std::vector<uint32_t> recalls;
std::vector<sdk::SearchResult> vector_search_results;
sdk::QueryResult vector_query_result;
};

// Do some ready work at arrange stage
Expand Down Expand Up @@ -100,6 +101,8 @@ class BaseOperation : public Operation {
Result VectorSearch(VectorIndexEntryPtr entry, const std::vector<sdk::VectorWithId>& vector_with_ids,
const sdk::SearchParam& search_param);

Result VectorBatchQuery(VectorIndexEntryPtr entry, const sdk::QueryParam& query_param);

std::shared_ptr<sdk::Client> client;
std::shared_ptr<dingodb::sdk::RawKV> raw_kv;
};
Expand Down Expand Up @@ -250,6 +253,18 @@ class VectorSearchOperation : public BaseOperation {
Result ExecuteManualData(VectorIndexEntryPtr entry);
};

class VectorQueryOperation : public VectorSearchOperation {
public:
VectorQueryOperation(std::shared_ptr<sdk::Client> client) : VectorSearchOperation(client) {}
~VectorQueryOperation() override = default;

Result Execute(VectorIndexEntryPtr entry) override;

private:
Result ExecuteAutoData(VectorIndexEntryPtr entry);
Result ExecuteManualData(VectorIndexEntryPtr entry);
};

bool IsSupportBenchmarkType(const std::string& benchmark);
std::string GetSupportBenchmarkType();
OperationPtr NewOperation(std::shared_ptr<sdk::Client> client);
Expand Down
2 changes: 1 addition & 1 deletion src/vector/vector_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ namespace dingodb {

DEFINE_int64(vector_index_max_range_search_result_count, 1024, "max range search result count");
DEFINE_int64(vector_index_bruteforce_batch_count, 2048, "bruteforce batch count");
DEFINE_bool(dingo_log_switch_scalar_speed_up_detail, true, "scalar speed up log");
DEFINE_bool(dingo_log_switch_scalar_speed_up_detail, false, "scalar speed up log");

bvar::LatencyRecorder g_bruteforce_search_latency("dingo_bruteforce_search_latency");
bvar::LatencyRecorder g_bruteforce_range_search_latency("dingo_bruteforce_range_search_latency");
Expand Down

0 comments on commit f872578

Please sign in to comment.