Skip to content

Commit

Permalink
Adjust the scan range considering the number of rows
Browse files Browse the repository at this point in the history
  • Loading branch information
takaebato committed Dec 16, 2024
1 parent 83b8efd commit 537d4e1
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 6 deletions.
17 changes: 15 additions & 2 deletions python/python/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,9 +486,11 @@ def test_limit_offset(tmp_path: Path, data_storage_version: str):

# test just limit
assert dataset.to_table(limit=10) == table.slice(0, 10)
assert dataset.to_table(limit=100) == table.slice(0, 100)

# test just offset
assert dataset.to_table(offset=10) == table.slice(10, 100)
assert dataset.to_table(offset=0) == table.slice(0, 100)
assert dataset.to_table(offset=10) == table.slice(10, 90)

# test both
assert dataset.to_table(offset=10, limit=10) == table.slice(10, 10)
Expand All @@ -503,7 +505,18 @@ def test_limit_offset(tmp_path: Path, data_storage_version: str):
assert dataset.to_table(offset=50, limit=25) == table.slice(50, 25)

# Limit past the end
assert dataset.to_table(offset=50, limit=100) == table.slice(50, 50)
assert dataset.to_table(limit=101) == table.slice(0, 100)

# Limit with offset past the end
assert dataset.to_table(offset=50, limit=51) == table.slice(50, 50)

# Offset past the end
assert dataset.to_table(offset=100) == table.slice(100, 0) # Empty table
assert dataset.to_table(offset=101) == table.slice(100, 0) # Empty table

# Offset with limit past the end
assert dataset.to_table(offset=100, limit=1) == table.slice(100, 0) # Empty table
assert dataset.to_table(offset=101, limit=1) == table.slice(100, 0) # Empty table

# Invalid limit / offset
with pytest.raises(ValueError, match="Offset must be non-negative"):
Expand Down
14 changes: 10 additions & 4 deletions rust/lance/src/dataset/scanner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1219,12 +1219,18 @@ impl Scanner {
} else {
match (self.limit, self.offset) {
(None, None) => None,
(Some(limit), None) => Some(0..limit as u64),
(Some(limit), None) => {
let num_rows = self.dataset.count_all_rows().await? as i64;
Some(0..limit.min(num_rows) as u64)
}
(None, Some(offset)) => {
let num_rows = self.dataset.count_all_rows().await?;
Some(offset as u64..num_rows as u64)
let num_rows = self.dataset.count_all_rows().await? as i64;
Some(offset.min(num_rows) as u64..num_rows as u64)
}
(Some(limit), Some(offset)) => {
let num_rows = self.dataset.count_all_rows().await? as i64;
Some(offset.min(num_rows) as u64..(offset + limit).min(num_rows) as u64)
}
(Some(limit), Some(offset)) => Some(offset as u64..(offset + limit) as u64),
}
};
let mut use_limit_node = true;
Expand Down

0 comments on commit 537d4e1

Please sign in to comment.