Skip to content

Commit 4082462

Browse files
sevdogauvipy
authored andcommitted
Align SearchFilter behaviour to django.contrib.admin search (encode#9017)
* Use subquery to remove duplicates in SearchFilter * Align SearchFilter behaviour to django.contrib.admin * Add compatibility with older django/python versions * Allow search to split also by comma after smart split * Use generator to build search conditions to reduce iterations * Improve search documentation * Update docs/api-guide/filtering.md --------- Co-authored-by: Asif Saif Uddin <[email protected]>
1 parent 6337477 commit 4082462

File tree

4 files changed

+146
-40
lines changed

4 files changed

+146
-40
lines changed

docs/api-guide/filtering.md

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -213,19 +213,23 @@ This will allow the client to filter the items in the list by making queries suc
213213
You can also perform a related lookup on a ForeignKey or ManyToManyField with the lookup API double-underscore notation:
214214

215215
search_fields = ['username', 'email', 'profile__profession']
216-
216+
217217
For [JSONField][JSONField] and [HStoreField][HStoreField] fields you can filter based on nested values within the data structure using the same double-underscore notation:
218218

219219
search_fields = ['data__breed', 'data__owner__other_pets__0__name']
220220

221-
By default, searches will use case-insensitive partial matches. The search parameter may contain multiple search terms, which should be whitespace and/or comma separated. If multiple search terms are used then objects will be returned in the list only if all the provided terms are matched.
221+
By default, searches will use case-insensitive partial matches. The search parameter may contain multiple search terms, which should be whitespace and/or comma separated. If multiple search terms are used then objects will be returned in the list only if all the provided terms are matched. Searches may contain _quoted phrases_ with spaces, each phrase is considered as a single search term.
222+
222223

223-
The search behavior may be restricted by prepending various characters to the `search_fields`.
224+
The search behavior may be specified by prefixing field names in `search_fields` with one of the following characters (which is equivalent to adding `__<lookup>` to the field):
224225

225-
* '^' Starts-with search.
226-
* '=' Exact matches.
227-
* '@' Full-text search. (Currently only supported Django's [PostgreSQL backend][postgres-search].)
228-
* '$' Regex search.
226+
| Prefix | Lookup | |
227+
| ------ | --------------| ------------------ |
228+
| `^` | `istartswith` | Starts-with search.|
229+
| `=` | `iexact` | Exact matches. |
230+
| `$` | `iregex` | Regex search. |
231+
| `@` | `search` | Full-text search (Currently only supported Django's [PostgreSQL backend][postgres-search]). |
232+
| None | `icontains` | Contains search (Default). |
229233

230234
For example:
231235

rest_framework/compat.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
versions of Django/Python, and compatibility wrappers around optional packages.
44
"""
55
import django
6-
from django.conf import settings
76
from django.views.generic import View
87

98

@@ -14,13 +13,6 @@ def unicode_http_header(value):
1413
return value
1514

1615

17-
def distinct(queryset, base):
18-
if settings.DATABASES[queryset.db]["ENGINE"] == "django.db.backends.oracle":
19-
# distinct analogue for Oracle users
20-
return base.filter(pk__in=set(queryset.values_list('pk', flat=True)))
21-
return queryset.distinct()
22-
23-
2416
# django.contrib.postgres requires psycopg2
2517
try:
2618
from django.contrib.postgres import fields as postgres_fields

rest_framework/filters.py

Lines changed: 62 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,35 @@
66
import warnings
77
from functools import reduce
88

9-
from django.core.exceptions import ImproperlyConfigured
9+
from django.core.exceptions import FieldDoesNotExist, ImproperlyConfigured
1010
from django.db import models
1111
from django.db.models.constants import LOOKUP_SEP
1212
from django.template import loader
1313
from django.utils.encoding import force_str
14+
from django.utils.text import smart_split, unescape_string_literal
1415
from django.utils.translation import gettext_lazy as _
1516

1617
from rest_framework import RemovedInDRF317Warning
17-
from rest_framework.compat import coreapi, coreschema, distinct
18+
from rest_framework.compat import coreapi, coreschema
19+
from rest_framework.fields import CharField
1820
from rest_framework.settings import api_settings
1921

2022

23+
def search_smart_split(search_terms):
24+
"""generator that first splits string by spaces, leaving quoted phrases togheter,
25+
then it splits non-quoted phrases by commas.
26+
"""
27+
for term in smart_split(search_terms):
28+
# trim commas to avoid bad matching for quoted phrases
29+
term = term.strip(',')
30+
if term.startswith(('"', "'")) and term[0] == term[-1]:
31+
# quoted phrases are kept togheter without any other split
32+
yield unescape_string_literal(term)
33+
else:
34+
# non-quoted tokens are split by comma, keeping only non-empty ones
35+
yield from (sub_term.strip() for sub_term in term.split(',') if sub_term)
36+
37+
2138
class BaseFilterBackend:
2239
"""
2340
A base class from which all filter backend classes should inherit.
@@ -64,18 +81,41 @@ def get_search_fields(self, view, request):
6481
def get_search_terms(self, request):
6582
"""
6683
Search terms are set by a ?search=... query parameter,
67-
and may be comma and/or whitespace delimited.
84+
and may be whitespace delimited.
6885
"""
69-
params = request.query_params.get(self.search_param, '')
70-
params = params.replace('\x00', '') # strip null characters
71-
params = params.replace(',', ' ')
72-
return params.split()
86+
value = request.query_params.get(self.search_param, '')
87+
field = CharField(trim_whitespace=False, allow_blank=True)
88+
return field.run_validation(value)
7389

74-
def construct_search(self, field_name):
90+
def construct_search(self, field_name, queryset):
7591
lookup = self.lookup_prefixes.get(field_name[0])
7692
if lookup:
7793
field_name = field_name[1:]
7894
else:
95+
# Use field_name if it includes a lookup.
96+
opts = queryset.model._meta
97+
lookup_fields = field_name.split(LOOKUP_SEP)
98+
# Go through the fields, following all relations.
99+
prev_field = None
100+
for path_part in lookup_fields:
101+
if path_part == "pk":
102+
path_part = opts.pk.name
103+
try:
104+
field = opts.get_field(path_part)
105+
except FieldDoesNotExist:
106+
# Use valid query lookups.
107+
if prev_field and prev_field.get_lookup(path_part):
108+
return field_name
109+
else:
110+
prev_field = field
111+
if hasattr(field, "path_infos"):
112+
# Update opts to follow the relation.
113+
opts = field.path_infos[-1].to_opts
114+
# django < 4.1
115+
elif hasattr(field, 'get_path_info'):
116+
# Update opts to follow the relation.
117+
opts = field.get_path_info()[-1].to_opts
118+
# Otherwise, use the field with icontains.
79119
lookup = 'icontains'
80120
return LOOKUP_SEP.join([field_name, lookup])
81121

@@ -113,26 +153,27 @@ def filter_queryset(self, request, queryset, view):
113153
return queryset
114154

115155
orm_lookups = [
116-
self.construct_search(str(search_field))
156+
self.construct_search(str(search_field), queryset)
117157
for search_field in search_fields
118158
]
119159

120160
base = queryset
121-
conditions = []
122-
for search_term in search_terms:
123-
queries = [
124-
models.Q(**{orm_lookup: search_term})
125-
for orm_lookup in orm_lookups
126-
]
127-
conditions.append(reduce(operator.or_, queries))
161+
# generator which for each term builds the corresponding search
162+
conditions = (
163+
reduce(
164+
operator.or_,
165+
(models.Q(**{orm_lookup: term}) for orm_lookup in orm_lookups)
166+
) for term in search_smart_split(search_terms)
167+
)
128168
queryset = queryset.filter(reduce(operator.and_, conditions))
129169

170+
# Remove duplicates from results, if necessary
130171
if self.must_call_distinct(queryset, search_fields):
131-
# Filtering against a many-to-many field requires us to
132-
# call queryset.distinct() in order to avoid duplicate items
133-
# in the resulting queryset.
134-
# We try to avoid this if possible, for performance reasons.
135-
queryset = distinct(queryset, base)
172+
# inspired by django.contrib.admin
173+
# this is more accurate than .distinct form M2M relationship
174+
# also is cross-database
175+
queryset = queryset.filter(pk=models.OuterRef('pk'))
176+
queryset = base.filter(models.Exists(queryset))
136177
return queryset
137178

138179
def to_html(self, request, queryset, view):

tests/test_filters.py

Lines changed: 73 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,36 @@
66
from django.db import models
77
from django.db.models import CharField, Transform
88
from django.db.models.functions import Concat, Upper
9-
from django.test import TestCase
9+
from django.test import SimpleTestCase, TestCase
1010
from django.test.utils import override_settings
1111

1212
from rest_framework import filters, generics, serializers
1313
from rest_framework.compat import coreschema
14+
from rest_framework.exceptions import ValidationError
1415
from rest_framework.test import APIRequestFactory
1516

1617
factory = APIRequestFactory()
1718

1819

20+
class SearchSplitTests(SimpleTestCase):
21+
22+
def test_keep_quoted_togheter_regardless_of_commas(self):
23+
assert ['hello, world'] == list(filters.search_smart_split('"hello, world"'))
24+
25+
def test_strips_commas_around_quoted(self):
26+
assert ['hello, world'] == list(filters.search_smart_split(',,"hello, world"'))
27+
assert ['hello, world'] == list(filters.search_smart_split(',,"hello, world",,'))
28+
assert ['hello, world'] == list(filters.search_smart_split('"hello, world",,'))
29+
30+
def test_splits_by_comma(self):
31+
assert ['hello', 'world'] == list(filters.search_smart_split(',,hello, world'))
32+
assert ['hello', 'world'] == list(filters.search_smart_split(',,hello, world,,'))
33+
assert ['hello', 'world'] == list(filters.search_smart_split('hello, world,,'))
34+
35+
def test_splits_quotes_followed_by_comma_and_sentence(self):
36+
assert ['"hello', 'world"', 'found'] == list(filters.search_smart_split('"hello, world",found'))
37+
38+
1939
class BaseFilterTests(TestCase):
2040
def setUp(self):
2141
self.original_coreapi = filters.coreapi
@@ -50,7 +70,8 @@ class Meta:
5070

5171

5272
class SearchFilterTests(TestCase):
53-
def setUp(self):
73+
@classmethod
74+
def setUpTestData(cls):
5475
# Sequence of title/text is:
5576
#
5677
# z abc
@@ -66,6 +87,9 @@ def setUp(self):
6687
)
6788
SearchFilterModel(title=title, text=text).save()
6889

90+
SearchFilterModel(title='A title', text='The long text').save()
91+
SearchFilterModel(title='The title', text='The "text').save()
92+
6993
def test_search(self):
7094
class SearchListView(generics.ListAPIView):
7195
queryset = SearchFilterModel.objects.all()
@@ -186,9 +210,21 @@ def test_search_field_with_null_characters(self):
186210
request = factory.get('/?search=\0as%00d\x00f')
187211
request = view.initialize_request(request)
188212

189-
terms = filters.SearchFilter().get_search_terms(request)
213+
with self.assertRaises(ValidationError):
214+
filters.SearchFilter().get_search_terms(request)
190215

191-
assert terms == ['asdf']
216+
def test_search_field_with_custom_lookup(self):
217+
class SearchListView(generics.ListAPIView):
218+
queryset = SearchFilterModel.objects.all()
219+
serializer_class = SearchFilterSerializer
220+
filter_backends = (filters.SearchFilter,)
221+
search_fields = ('text__iendswith',)
222+
view = SearchListView.as_view()
223+
request = factory.get('/', {'search': 'c'})
224+
response = view(request)
225+
assert response.data == [
226+
{'id': 1, 'title': 'z', 'text': 'abc'},
227+
]
192228

193229
def test_search_field_with_additional_transforms(self):
194230
from django.test.utils import register_lookup
@@ -242,6 +278,32 @@ class SearchListView(generics.ListAPIView):
242278
)
243279
assert search_query in rendered_search_field
244280

281+
def test_search_field_with_escapes(self):
282+
class SearchListView(generics.ListAPIView):
283+
queryset = SearchFilterModel.objects.all()
284+
serializer_class = SearchFilterSerializer
285+
filter_backends = (filters.SearchFilter,)
286+
search_fields = ('title', 'text',)
287+
view = SearchListView.as_view()
288+
request = factory.get('/', {'search': '"\\\"text"'})
289+
response = view(request)
290+
assert response.data == [
291+
{'id': 12, 'title': 'The title', 'text': 'The "text'},
292+
]
293+
294+
def test_search_field_with_quotes(self):
295+
class SearchListView(generics.ListAPIView):
296+
queryset = SearchFilterModel.objects.all()
297+
serializer_class = SearchFilterSerializer
298+
filter_backends = (filters.SearchFilter,)
299+
search_fields = ('title', 'text',)
300+
view = SearchListView.as_view()
301+
request = factory.get('/', {'search': '"long text"'})
302+
response = view(request)
303+
assert response.data == [
304+
{'id': 11, 'title': 'A title', 'text': 'The long text'},
305+
]
306+
245307

246308
class AttributeModel(models.Model):
247309
label = models.CharField(max_length=32)
@@ -284,6 +346,13 @@ def test_must_call_distinct_restores_meta_for_each_field(self):
284346
["%sattribute__label" % prefix, "%stitle" % prefix]
285347
)
286348

349+
def test_custom_lookup_to_related_model(self):
350+
# In this test case the attribute of the fk model comes first in the
351+
# list of search fields.
352+
filter_ = filters.SearchFilter()
353+
assert 'attribute__label__icontains' == filter_.construct_search('attribute__label', SearchFilterModelFk._meta)
354+
assert 'attribute__label__iendswith' == filter_.construct_search('attribute__label__iendswith', SearchFilterModelFk._meta)
355+
287356

288357
class SearchFilterModelM2M(models.Model):
289358
title = models.CharField(max_length=20)

0 commit comments

Comments
 (0)