Skip to content

Commit

Permalink
planner: don't calc the heavy expression used in ORDER BY stmt twice (#…
Browse files Browse the repository at this point in the history
…58208)

ref #54245, close #56318
  • Loading branch information
EricZequan authored Dec 23, 2024
1 parent 5fbe4fb commit 33f0727
Show file tree
Hide file tree
Showing 8 changed files with 939 additions and 245 deletions.
2 changes: 1 addition & 1 deletion pkg/expression/integration_test/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ go_test(
"main_test.go",
],
flaky = True,
shard_count = 48,
shard_count = 50,
deps = [
"//pkg/config",
"//pkg/domain",
Expand Down
209 changes: 203 additions & 6 deletions pkg/expression/integration_test/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -339,12 +339,11 @@ func TestVectorConstantExplain(t *testing.T) {
))
tk.MustQuery(`EXPLAIN format = 'brief' SELECT VEC_COSINE_DISTANCE(c, '[1,2,3,4,5,6,7,8,9,10,11]') AS d FROM t ORDER BY d LIMIT 10;`).Check(testkit.Rows(
"Projection 10.00 root vec_cosine_distance(test.t.c, [1,2,3,4,5,(6 more)...])->Column#3",
"└─Projection 10.00 root test.t.c",
" └─TopN 10.00 root Column#4, offset:0, count:10",
" └─Projection 10.00 root test.t.c, vec_cosine_distance(test.t.c, [1,2,3,4,5,(6 more)...])->Column#4",
" └─TableReader 10.00 root data:TopN",
" └─TopN 10.00 cop[tikv] vec_cosine_distance(test.t.c, [1,2,3,4,5,(6 more)...]), offset:0, count:10",
" └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo",
"└─TopN 10.00 root Column#4, offset:0, count:10",
" └─TableReader 10.00 root data:TopN",
" └─TopN 10.00 cop[tikv] Column#4, offset:0, count:10",
" └─Projection 10.00 cop[tikv] test.t.c, vec_cosine_distance(test.t.c, [1,2,3,4,5,(6 more)...])->Column#4",
" └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo",
))

// Prepare a large Vector string
Expand Down Expand Up @@ -849,6 +848,204 @@ func TestVectorMiscFunctions(t *testing.T) {
tk.MustQuery(`SELECT * FROM a;`).Check(testkit.Rows("1 [2,10,14] 3"))
}

func testVectorSearchInternal(tk *testkit.TestKit) {
tk.MustExec(`
create table t1 (
id int primary key,
vec vector(3),
a int,
b int,
c vector(3),
d vector,
VECTOR INDEX idx_embedding ((VEC_COSINE_DISTANCE(vec)))
)
`)
tk.MustExec(`
insert into t1 values
(1, '[1,1,1]', 11, 111, '[1,1,1]', '[1,1,1]'),
(2, '[2,2,2]', 22, 222, '[2,2,2]', '[2,2,2]'),
(3, '[3,3,3]', 33, 333, '[3,3,3]', '[3,3,3]');
`)
tk.MustExec("analyze table t1")

tk.MustQuery("select id from t1 order by id").Check(testkit.Rows(
"1",
"2",
"3",
))
tk.MustQuery("select id from t1 order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows(
"3",
"2",
"1",
))
tk.MustQuery("select id from t1 order by vec_l2_distance(vec, '[3,3,3]') limit 1").Check(testkit.Rows(
"3",
))
tk.MustQuery("select id from t1 order by vec_l2_distance(vec, '[3,3,3]')").Check(testkit.Rows(
"3",
"2",
"1",
))
tk.MustQuery("select * from t1 order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows(
"3 [3,3,3] 33 333 [3,3,3] [3,3,3]",
"2 [2,2,2] 22 222 [2,2,2] [2,2,2]",
"1 [1,1,1] 11 111 [1,1,1] [1,1,1]",
))
tk.MustQuery("select id, a, b from t1 order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows(
"3 33 333",
"2 22 222",
"1 11 111",
))
tk.MustQuery("select a, id, b from t1 order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows(
"33 3 333",
"22 2 222",
"11 1 111",
))
tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d from t1 order by d limit 10").Check(testkit.Rows(
"3 0",
"2 1.7320508075688772",
"1 3.4641016151377544",
))
tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d from t1 order by d").Check(testkit.Rows(
"3 0",
"2 1.7320508075688772",
"1 3.4641016151377544",
))
tk.MustQuery("select *, vec_l2_distance(vec, '[3,3,3]') as d from t1 order by d limit 10").Check(testkit.Rows(
"3 [3,3,3] 33 333 [3,3,3] [3,3,3] 0",
"2 [2,2,2] 22 222 [2,2,2] [2,2,2] 1.7320508075688772",
"1 [1,1,1] 11 111 [1,1,1] [1,1,1] 3.4641016151377544",
))
tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d, a, b from t1 order by d limit 10").Check(testkit.Rows(
"3 0 33 333",
"2 1.7320508075688772 22 222",
"1 3.4641016151377544 11 111",
))
tk.MustQuery("select id, a, b, vec_l2_distance(vec, '[3,3,3]') as d from t1 order by d limit 10").Check(testkit.Rows(
"3 33 333 0",
"2 22 222 1.7320508075688772",
"1 11 111 3.4641016151377544",
))

tk.MustExec(`
create table tp (
id int,
vec vector(3) comment 'hnsw(distance=cosine)',
a int, b int,
store_id int
) PARTITION BY RANGE COLUMNS(store_id) (
PARTITION p0 VALUES LESS THAN (100),
PARTITION p1 VALUES LESS THAN (200),
PARTITION p2 VALUES LESS THAN (MAXVALUE)
);
`)
tk.MustExec(`
insert into tp values
(1, '[1,1,1]', 11, 111, 50),
(2, '[2,2,2]', 22, 222, 150),
(3, '[3,3,3]', 33, 333, 250);
`)
tk.MustExec("analyze table tp")

tk.MustQuery("select id from tp order by id").Check(testkit.Rows(
"1",
"2",
"3",
))
tk.MustQuery("select id from tp order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows(
"3",
"2",
"1",
))
tk.MustQuery("select id from tp order by vec_l2_distance(vec, '[3,3,3]')").Check(testkit.Rows(
"3",
"2",
"1",
))
tk.MustQuery("select * from tp order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows(
"3 [3,3,3] 33 333 250",
"2 [2,2,2] 22 222 150",
"1 [1,1,1] 11 111 50",
))
tk.MustQuery("select id, a, b from tp order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows(
"3 33 333",
"2 22 222",
"1 11 111",
))
tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d from tp order by d limit 10").Check(testkit.Rows(
"3 0",
"2 1.7320508075688772",
"1 3.4641016151377544",
))
tk.MustQuery("select *, vec_l2_distance(vec, '[3,3,3]') as d from tp order by d limit 10").Check(testkit.Rows(
"3 [3,3,3] 33 333 250 0",
"2 [2,2,2] 22 222 150 1.7320508075688772",
"1 [1,1,1] 11 111 50 3.4641016151377544",
))
tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d, a, b from tp order by d limit 10").Check(testkit.Rows(
"3 0 33 333",
"2 1.7320508075688772 22 222",
"1 3.4641016151377544 11 111",
))
tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d, a, b from tp order by d").Check(testkit.Rows(
"3 0 33 333",
"2 1.7320508075688772 22 222",
"1 3.4641016151377544 11 111",
))

tk.MustQuery("select id from tp partition (p0) order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows(
"1",
))
tk.MustQuery("select * from tp partition (p0) order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows(
"1 [1,1,1] 11 111 50",
))
tk.MustQuery("select id, a, b from tp partition (p0) order by vec_l2_distance(vec, '[3,3,3]') limit 10").Check(testkit.Rows(
"1 11 111",
))
tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d from tp partition (p0) order by d limit 10").Check(testkit.Rows(
"1 3.4641016151377544",
))
tk.MustQuery("select *, vec_l2_distance(vec, '[3,3,3]') as d from tp partition (p0) order by d limit 10").Check(testkit.Rows(
"1 [1,1,1] 11 111 50 3.4641016151377544",
))
tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d, a, b from tp partition (p0) order by d limit 10").Check(testkit.Rows(
"1 3.4641016151377544 11 111",
))
tk.MustQuery("select id, vec_l2_distance(vec, '[3,3,3]') as d, a, b from tp partition (p0) order by d").Check(testkit.Rows(
"1 3.4641016151377544 11 111",
))
}

func TestVectorSearchExtractProj(t *testing.T) {
{
store, _ := testkit.CreateMockStoreAndDomainWithSchemaLease(t, 200*time.Millisecond, mockstore.WithMockTiFlash(1))
tk := testkit.NewTestKit(t, store)
tk.MustExec("USE test;")
testVectorSearchInternal(tk)
}
{
store, _ := testkit.CreateMockStoreAndDomainWithSchemaLease(t, 200*time.Millisecond, mockstore.WithMockTiFlash(1))
tk := testkit.NewTestKit(t, store)
tk.MustExec("USE test;")
tk.MustExec("SET SESSION tidb_opt_fix_control = '56318:OFF';")
testVectorSearchInternal(tk)
}
}

func TestVectorSearchPreparedStatement(t *testing.T) {
store, _ := testkit.CreateMockStoreAndDomainWithSchemaLease(t, 200*time.Millisecond, mockstore.WithMockTiFlash(1))
tk := testkit.NewTestKit(t, store)
tk.MustExec("USE test;")
tk.MustExec("CREATE TABLE t1 (pk INT PRIMARY KEY, vec vector(3), VECTOR INDEX idx_embedding ((VEC_COSINE_DISTANCE(vec))) );")
tk.MustExec("INSERT INTO t1 VALUES (1, '[1,2,3]'), (2, '[4,5,6]'), (3, '[7,8,9]');")
tk.MustExec("ANALYZE TABLE t1;")

tk.MustExec("PREPARE stmt FROM 'SELECT pk FROM t1 ORDER BY vec_cosine_distance(vec, ?) LIMIT ?';")
tk.MustExec("SET @pvec = '[7,8,9]';")
tk.MustExec("SET @plimit = 10;")
tk.MustQuery("EXECUTE stmt USING @pvec, @plimit;").Check(testkit.Rows("3", "2", "1"))
}

func TestGetLock(t *testing.T) {
ctx := context.Background()
store := testkit.CreateMockStore(t, mockstore.WithStoreType(mockstore.EmbedUnistore))
Expand Down
2 changes: 1 addition & 1 deletion pkg/planner/core/casetest/vectorsearch/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ go_test(
],
data = glob(["testdata/**"]),
flaky = True,
shard_count = 5,
shard_count = 7,
deps = [
"//pkg/config",
"//pkg/domain",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,41 @@
"explain select * from t1 where store_id between 80 and 120 order by vec_cosine_distance(vec, '[1,1,1]') limit 1",
"explain select * from t1 partition (p0) order by vec_cosine_distance(vec, '[1,1,1]') limit 1"
]
},
{
"name": "TestVectorSearchWithPKAuto",
"cases": [
"explain select id from t1",

"explain select id from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10",
"explain select id from t1 order by vec_cosine_distance(vec, '[1,1,1]')",
"explain select * from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10",
"explain select id, a, b from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10",
"explain select a, id, b from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10",

"explain select id, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10",
"explain select id, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d",
"explain select *, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10",
"explain select id, vec_cosine_distance(vec, '[1,1,1]') as d, a, b from t1 order by d limit 10",
"explain select id, a, b, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10"
]
},
{
"name": "TestVectorSearchWithPKForceTiKV",
"cases": [
"explain select id from t1",

"explain select id from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10",
"explain select id from t1 order by vec_cosine_distance(vec, '[1,1,1]')",
"explain select * from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10",
"explain select id, a, b from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10",
"explain select a, id, b from t1 order by vec_cosine_distance(vec, '[1,1,1]') limit 10",

"explain select id, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10",
"explain select id, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d",
"explain select *, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10",
"explain select id, vec_cosine_distance(vec, '[1,1,1]') as d, a, b from t1 order by d limit 10",
"explain select id, a, b, vec_cosine_distance(vec, '[1,1,1]') as d from t1 order by d limit 10"
]
}
]
Loading

0 comments on commit 33f0727

Please sign in to comment.