Skip to content

Commit

Permalink
add docstr for filter function (#177)
Browse files Browse the repository at this point in the history
* docs(filter): init docstr

* fix: add regex filter test
  • Loading branch information
numb3r3 authored Mar 8, 2022
1 parent fbde062 commit a045b37
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 1 deletion.
29 changes: 29 additions & 0 deletions docarray/array/mixins/find.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,35 @@ def _filter(
limit: Optional[int] = None,
only_id: bool = False,
) -> 'DocumentArray':
"""Returns a subset of documents by filtering by the given query.
The query language we provide now is following the
[MongoDB](https://docs.mongodb.com/manual/reference/operator/query/) query language. For example::
>>> docs._filter({'text': {'$eq': 'hello'}})
The above will return a `DocumentArray` in which each document has doc.text == 'hello'. And we also support
placeholder format by using the following syntax::
>>> docs._filter({'text': {'$eq': '{tags__name}'}})
will return a `DocumentArray` in which each document has doc.text == doc.tags['name'].
Now, only the subset of MongoDB's query operators are supported:
- `$eq` - Equal to (number, string)
- `$ne` - Not equal to (number, string)
- `$gt` - Greater than (number)
- `$gte` - Greater than or equal to (number)
- `$lt` - Less than (number)
- `$lte` - Less than or equal to (number)
- `$in` - Included in an array
- `$nin` - Not included in an array
- `$regex` - Match a specified regular expression
:param query: the input query dictionary.
:param limit: the maximum number of matches, when not given defaults to 20.
:param only_id: if set, then returning documents will only contain ``id``
:return: a `DocumentArray` containing the `Document` objects for matching with the query.
"""
from ... import DocumentArray
from ..queryset import QueryParser

Expand Down
10 changes: 9 additions & 1 deletion tests/unit/array/mixins/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def test_empty_filter(docs):
assert len(result) == 5


def test_sample_filter(docs):
def test_simple_filter(docs):
result = docs.find({'text': {'$eq': 'hello'}})
assert len(result) == 1
assert result[0].text == 'hello'
Expand All @@ -31,6 +31,14 @@ def test_sample_filter(docs):
assert len(result) == 1
assert result[0].tags['x'] == 0.8

result = docs.find({'tags__name': {'$regex': '^h'}})
assert len(result) == 2
assert result[1].id == docs[1].id

result = docs.find({'text': {'$regex': '^h'}})
assert len(result) == 1
assert result[0].id == docs[0].id


def test_logic_filter(docs):
result = docs.find({'$or': {'tags__x': {'$gte': 0.1}, 'tags__y': {'$gte': 0.5}}})
Expand Down
3 changes: 3 additions & 0 deletions tests/unit/array/test_lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ def test_lookup_ops(doc):
assert lookup('tags__x__lte', 0.1, doc)
assert not lookup('tags__y__lt', 1.5, doc)

assert lookup('text__regex', '^test', doc)
assert not lookup('text__regex', '^est', doc)


def test_lookup_pl(doc):
from docarray.array.queryset.lookup import lookup
Expand Down

0 comments on commit a045b37

Please sign in to comment.