forked from lance-format/lance
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathquery.py
More file actions
245 lines (204 loc) · 6.89 KB
/
query.py
File metadata and controls
245 lines (204 loc) · 6.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The Lance Authors
import abc
from enum import Enum
from typing import Optional
from .lance import PyFullTextQuery
class FullTextQueryType(Enum):
MATCH = "match"
MATCH_PHRASE = "match_phrase"
BOOST = "boost"
MULTI_MATCH = "multi_match"
BOOLEAN = "boolean"
class FullTextOperator(Enum):
AND = "AND"
OR = "OR"
class Occur(Enum):
SHOULD = "SHOULD"
MUST = "MUST"
MUST_NOT = "MUST_NOT"
class FullTextQuery(abc.ABC):
_inner: PyFullTextQuery
@property
def inner(self) -> PyFullTextQuery:
"""
Get the inner query object.
Returns
-------
PyFullTextQuery
The inner query object.
"""
return self._inner
@abc.abstractmethod
def query_type(self) -> FullTextQueryType:
"""
Get the query type of the query.
Returns
-------
str
The type of the query.
"""
def __and__(self, other: "FullTextQuery") -> "FullTextQuery":
"""
Combine two queries with a logical AND operation.
Parameters
----------
other : FullTextQuery
The other query to combine with.
Returns
-------
FullTextQuery
A new query that combines both queries with AND.
"""
return BooleanQuery([(Occur.MUST, self), (Occur.MUST, other)])
def __or__(self, other: "FullTextQuery") -> "FullTextQuery":
"""
Combine two queries with a logical OR operation.
Parameters
----------
other : FullTextQuery
The other query to combine with.
Returns
-------
FullTextQuery
A new query that combines both queries with OR.
"""
return BooleanQuery([(Occur.SHOULD, self), (Occur.SHOULD, other)])
class MatchQuery(FullTextQuery):
def __init__(
self,
query: str,
column: str,
*,
boost: float = 1.0,
fuzziness: int = 0,
max_expansions: int = 50,
operator: FullTextOperator = FullTextOperator.OR,
prefix_length: int = 0,
):
"""
Match query for full-text search.
Parameters
----------
query : str
The query string to match against.
column : str
The name of the column to match against.
boost : float, default 1.0
The boost factor for the query.
The score of each matching document is multiplied by this value.
fuzziness : int, optional
The maximum edit distance for each term in the match query.
Defaults to 0 (exact match).
If None, fuzziness is applied automatically by the rules:
- 0 for terms with length <= 2
- 1 for terms with length <= 5
- 2 for terms with length > 5
max_expansions : int, optional
The maximum number of terms to consider for fuzzy matching.
Defaults to 50.
operator : FullTextOperator, default OR
The operator to use for combining the query results.
Can be either `AND` or `OR`.
If `AND`, all terms in the query must match.
If `OR`, at least one term in the query must match.
prefix_length : int, default 0
The number of beginning characters being unchanged for fuzzy matching.
This is useful to achieve prefix matching.
"""
self._inner = PyFullTextQuery.match_query(
query,
column,
boost=boost,
fuzziness=fuzziness,
max_expansions=max_expansions,
operator=operator.value,
prefix_length=prefix_length,
)
def query_type(self) -> FullTextQueryType:
return FullTextQueryType.MATCH
class PhraseQuery(FullTextQuery):
def __init__(self, query: str, column: str, *, slop: int = 0):
"""
Phrase query for full-text search.
Parameters
----------
query : str
The query string to match against.
column : str
The name of the column to match against.
"""
self._inner = PyFullTextQuery.phrase_query(query, column, slop)
def query_type(self) -> FullTextQueryType:
return FullTextQueryType.MATCH_PHRASE
class BoostQuery(FullTextQuery):
def __init__(
self,
positive: FullTextQuery,
negative: FullTextQuery,
*,
negative_boost: float = 0.5,
):
"""
Boost query for full-text search.
Parameters
----------
positive : dict
The positive query object.
negative : dict
The negative query object.
negative_boost : float, default 0.5
The boost factor for the negative query.
"""
self._inner = PyFullTextQuery.boost_query(
positive.inner, negative.inner, negative_boost
)
def query_type(self) -> FullTextQueryType:
return FullTextQueryType.BOOST
class MultiMatchQuery(FullTextQuery):
def __init__(
self,
query: str,
columns: list[str],
*,
boosts: Optional[list[float]] = None,
operator: FullTextOperator = FullTextOperator.OR,
):
"""
Multi-match query for full-text search.
Parameters
----------
query : str | list[Query]
If a string, the query string to match against.
columns : list[str]
The list of columns to match against.
boosts : list[float], optional
The list of boost factors for each column. If not provided,
all columns will have the same boost factor.
operator : FullTextOperator, default OR
The operator to use for combining the query results.
Can be either `AND` or `OR`.
It would be applied to all columns individually.
For example, if the operator is `AND`,
then the query "hello world" is equal to
`match("hello AND world", column1) OR match("hello AND world", column2)`.
"""
self._inner = PyFullTextQuery.multi_match_query(
query, columns, boosts=boosts, operator=operator.value
)
def query_type(self) -> FullTextQueryType:
return FullTextQueryType.MULTI_MATCH
class BooleanQuery(FullTextQuery):
def __init__(self, queries: list[tuple[Occur, FullTextQuery]]):
"""
Boolean query for full-text search.
Parameters
----------
queries : list[tuple(Occur, FullTextQuery)]
The list of queries with their occurrence requirements.
"""
self._inner = PyFullTextQuery.boolean_query(
[(occur.value, query.inner) for occur, query in queries]
)
def query_type(self) -> FullTextQueryType:
return FullTextQueryType.BOOLEAN