4
4
import time
5
5
import meilisearch
6
6
from builtins import range
7
+ import pyhash
7
8
8
9
def clean_one_field (value ):
9
10
if value is None :
@@ -31,13 +32,16 @@ def parse_record(record):
31
32
for k , v in record ['hierarchy_radio' ].items ():
32
33
key = 'hierarchy_radio_' + k
33
34
new_hierarchy_radio = {** {key : v }, ** new_hierarchy_radio }
35
+ hasher = pyhash .fnv1_32 ()
36
+ new_record_url_hash = {}
37
+ new_record_url_hash ['url_hash' ] = hasher (record ['url' ])
34
38
del record ['weight' ]
35
39
del record ['hierarchy' ]
36
40
del record ['hierarchy_radio' ]
37
41
del record ['hierarchy_camel' ]
38
42
del record ['hierarchy_radio_camel' ]
39
43
del record ['content_camel' ]
40
- return {** record , ** new_weight , ** new_hierarchy , ** new_hierarchy_radio }
44
+ return {** record , ** new_weight , ** new_hierarchy , ** new_hierarchy_radio , ** new_record_url_hash }
41
45
42
46
class MeiliSearchHelper :
43
47
"""MeiliSearchHelper"""
@@ -54,7 +58,7 @@ class MeiliSearchHelper:
54
58
"level" ,
55
59
"position"
56
60
],
57
- "distinctField" : "url " ,
61
+ "distinctField" : "url_hash " ,
58
62
"rankingRules" : {
59
63
"page_rank" : "dsc" ,
60
64
"level" : "dsc" ,
@@ -96,6 +100,7 @@ def update_schema_based_on(self, record):
96
100
base_schema = {
97
101
'anchor' : ['displayed' ],
98
102
'url' : ['displayed' ],
103
+ 'url_hash' : ['displayed' , 'ranked' ],
99
104
'content' : ['indexed' , 'displayed' ],
100
105
'objectID' : ['identifier' , 'indexed' , 'displayed' ],
101
106
'page_rank' : ['indexed' , 'ranked' ],
0 commit comments