11
11
12
12
13
13
def get_client_es ():
14
- with open (' ../config.yml' , 'r' ) as file :
14
+ with open (" ../config.yml" , "r" ) as file :
15
15
config = yaml .safe_load (file )
16
- return Elasticsearch (
17
- cloud_id = config ['cloud_id' ],
18
- api_key = config ['api_key' ]
19
- )
16
+ return Elasticsearch (cloud_id = config ["cloud_id" ], api_key = config ["api_key" ])
20
17
21
18
22
19
def get_text_vector (sentences ):
23
- model = SentenceTransformer (' sentence-transformers/all-MiniLM-L6-v2' )
20
+ model = SentenceTransformer (" sentence-transformers/all-MiniLM-L6-v2" )
24
21
embeddings = model .encode (sentences )
25
22
return embeddings
26
23
27
24
28
25
def build_query (term = None , categories = None , product_types = None , brands = None ):
29
- must_query = [{"match_all" : {}}] if not term else [{
30
- "multi_match" : {
31
- "query" : term ,
32
- "fields" : ["name" , "category" , "description" ]
33
- }
34
- }]
26
+ must_query = (
27
+ [{"match_all" : {}}]
28
+ if not term
29
+ else [
30
+ {
31
+ "multi_match" : {
32
+ "query" : term ,
33
+ "fields" : ["name" , "category" , "description" ],
34
+ }
35
+ }
36
+ ]
37
+ )
35
38
36
39
filters = []
37
40
if categories :
@@ -42,17 +45,23 @@ def build_query(term=None, categories=None, product_types=None, brands=None):
42
45
filters .append ({"terms" : {"brand.keyword" : brands }})
43
46
44
47
return {
45
- "_source" : ["id" , "brand" , "name" , "price" , "currency" , "image_link" , "category" , "tag_list" ],
46
- "query" : {
47
- "bool" : {
48
- "must" : must_query ,
49
- "filter" : filters
50
- }
51
- }
48
+ "_source" : [
49
+ "id" ,
50
+ "brand" ,
51
+ "name" ,
52
+ "price" ,
53
+ "currency" ,
54
+ "image_link" ,
55
+ "category" ,
56
+ "tag_list" ,
57
+ ],
58
+ "query" : {"bool" : {"must" : must_query , "filter" : filters }},
52
59
}
53
60
54
61
55
- def build_hybrid_query (term = None , categories = None , product_types = None , brands = None , hybrid = False ):
62
+ def build_hybrid_query (
63
+ term = None , categories = None , product_types = None , brands = None , hybrid = False
64
+ ):
56
65
# Standard query
57
66
organic_query = build_query (term , categories , product_types , brands )
58
67
@@ -65,81 +74,79 @@ def build_hybrid_query(term=None, categories=None, product_types=None, brands=No
65
74
"retriever" : {
66
75
"rrf" : {
67
76
"retrievers" : [
68
- {
69
- "standard" : {
70
- "query" : organic_query ['query' ]
71
- }
72
- },
77
+ {"standard" : {"query" : organic_query ["query" ]}},
73
78
{
74
79
"knn" : {
75
80
"field" : "description_embeddings" ,
76
81
"query_vector" : vector ,
77
82
"k" : 5 ,
78
83
"num_candidates" : 20 ,
79
- "filter" : {
80
- "bool" : {
81
- "filter" : []
82
- }
83
- }
84
+ "filter" : {"bool" : {"filter" : []}},
84
85
}
85
- }
86
+ },
86
87
],
87
88
"rank_window_size" : 20 ,
88
- "rank_constant" : 5
89
+ "rank_constant" : 5 ,
89
90
}
90
91
},
91
- "_source" : organic_query [' _source' ]
92
+ "_source" : organic_query [" _source" ],
92
93
}
93
94
94
95
if categories :
95
- query [' retriever' ][ ' rrf' ][ ' retrievers' ][1 ][' knn' ][ ' filter' ][ ' bool' ][ 'filter' ]. append ({
96
- "terms" : { "category" : categories }
97
- })
96
+ query [" retriever" ][ " rrf" ][ " retrievers" ][1 ][" knn" ][ " filter" ][ " bool" ][
97
+ "filter"
98
+ ]. append ({ "terms" : { "category" : categories } })
98
99
if product_types :
99
- query [' retriever' ][ ' rrf' ][ ' retrievers' ][1 ][' knn' ][ ' filter' ][ ' bool' ][ 'filter' ]. append ({
100
- "terms" : { "product_type" : product_types }
101
- })
100
+ query [" retriever" ][ " rrf" ][ " retrievers" ][1 ][" knn" ][ " filter" ][ " bool" ][
101
+ "filter"
102
+ ]. append ({ "terms" : { "product_type" : product_types } })
102
103
if brands :
103
- query [' retriever' ][ ' rrf' ][ ' retrievers' ][1 ][' knn' ][ ' filter' ][ ' bool' ][ 'filter' ]. append ({
104
- "terms" : { "brand.keyword" : brands }
105
- })
104
+ query [" retriever" ][ " rrf" ][ " retrievers" ][1 ][" knn" ][ " filter" ][ " bool" ][
105
+ "filter"
106
+ ]. append ({ "terms" : { "brand.keyword" : brands } })
106
107
else :
107
108
query = organic_query
108
109
109
110
return query
110
111
111
112
112
- def search_products (term , categories = None , product_types = None , brands = None , promote_products = [], hybrid = False ):
113
+ def search_products (
114
+ term ,
115
+ categories = None ,
116
+ product_types = None ,
117
+ brands = None ,
118
+ promote_products = [],
119
+ hybrid = False ,
120
+ ):
113
121
query = build_hybrid_query (term , categories , product_types , brands , hybrid )
114
122
115
123
if promote_products and not hybrid :
116
124
query = {
117
- "query" : {
118
- "pinned" : {
119
- "ids" : promote_products ,
120
- "organic" : query ['query' ]
121
- }
122
- },
123
- "_source" : query ['_source' ]
125
+ "query" : {"pinned" : {"ids" : promote_products , "organic" : query ["query" ]}},
126
+ "_source" : query ["_source" ],
124
127
}
125
128
126
129
print (query )
127
130
response = get_client_es ().search (index = "products-catalog" , body = query , size = 20 )
128
131
129
132
results = []
130
- for hit in response [' hits' ][ ' hits' ]:
133
+ for hit in response [" hits" ][ " hits" ]:
131
134
print (f"Product Name: { hit ['_source' ]['name' ]} , Score: { hit ['_score' ]} " )
132
135
133
- results .append ({
134
- "id" : hit ['_source' ]['id' ],
135
- "brand" : hit ['_source' ]['brand' ],
136
- "name" : hit ['_source' ]['name' ],
137
- "price" : hit ['_source' ]['price' ],
138
- "currency" : hit ['_source' ]['currency' ] if hit ['_source' ]['currency' ] else "USD" ,
139
- "image_link" : hit ['_source' ]['image_link' ],
140
- "category" : hit ['_source' ]['category' ],
141
- "tags" : hit ['_source' ].get ('tag_list' , [])
142
- })
136
+ results .append (
137
+ {
138
+ "id" : hit ["_source" ]["id" ],
139
+ "brand" : hit ["_source" ]["brand" ],
140
+ "name" : hit ["_source" ]["name" ],
141
+ "price" : hit ["_source" ]["price" ],
142
+ "currency" : (
143
+ hit ["_source" ]["currency" ] if hit ["_source" ]["currency" ] else "USD"
144
+ ),
145
+ "image_link" : hit ["_source" ]["image_link" ],
146
+ "category" : hit ["_source" ]["category" ],
147
+ "tags" : hit ["_source" ].get ("tag_list" , []),
148
+ }
149
+ )
143
150
144
151
return results
145
152
@@ -149,51 +156,55 @@ def get_facets_data(term, categories=None, product_types=None, brands=None):
149
156
query ["aggs" ] = {
150
157
"product_types" : {"terms" : {"field" : "product_type" }},
151
158
"categories" : {"terms" : {"field" : "category" }},
152
- "brands" : {"terms" : {"field" : "brand.keyword" }}
159
+ "brands" : {"terms" : {"field" : "brand.keyword" }},
153
160
}
154
161
response = get_client_es ().search (index = "products-catalog" , body = query , size = 0 )
155
162
156
163
return {
157
164
"product_types" : [
158
- {"product_type" : bucket [' key' ], "count" : bucket [' doc_count' ]}
159
- for bucket in response [' aggregations' ][ ' product_types' ][ ' buckets' ]
165
+ {"product_type" : bucket [" key" ], "count" : bucket [" doc_count" ]}
166
+ for bucket in response [" aggregations" ][ " product_types" ][ " buckets" ]
160
167
],
161
168
"categories" : [
162
- {"category" : bucket [' key' ], "count" : bucket [' doc_count' ]}
163
- for bucket in response [' aggregations' ][ ' categories' ][ ' buckets' ]
169
+ {"category" : bucket [" key" ], "count" : bucket [" doc_count" ]}
170
+ for bucket in response [" aggregations" ][ " categories" ][ " buckets" ]
164
171
],
165
172
"brands" : [
166
- {"brand" : bucket [' key' ], "count" : bucket [' doc_count' ]}
167
- for bucket in response [' aggregations' ][ ' brands' ][ ' buckets' ]
168
- ]
173
+ {"brand" : bucket [" key" ], "count" : bucket [" doc_count" ]}
174
+ for bucket in response [" aggregations" ][ " brands" ][ " buckets" ]
175
+ ],
169
176
}
170
177
171
178
172
- @app .route (' /api/products/search' , methods = [' GET' ])
179
+ @app .route (" /api/products/search" , methods = [" GET" ])
173
180
def search ():
174
- query = request .args .get ('query' )
175
- categories = request .args .getlist ('selectedCategories[]' )
176
- product_types = request .args .getlist ('selectedProductTypes[]' )
177
- brands = request .args .getlist ('selectedBrands[]' )
178
- hybrid = request .args .get ('hybrid' , 'False' ).lower () == 'true'
179
- results = search_products (query , categories = categories , product_types = product_types ,
180
- brands = brands ,
181
- promote_products = promote_products_free_gluten ,
182
- hybrid = hybrid )
181
+ query = request .args .get ("query" )
182
+ categories = request .args .getlist ("selectedCategories[]" )
183
+ product_types = request .args .getlist ("selectedProductTypes[]" )
184
+ brands = request .args .getlist ("selectedBrands[]" )
185
+ hybrid = request .args .get ("hybrid" , "False" ).lower () == "true"
186
+ results = search_products (
187
+ query ,
188
+ categories = categories ,
189
+ product_types = product_types ,
190
+ brands = brands ,
191
+ promote_products = promote_products_free_gluten ,
192
+ hybrid = hybrid ,
193
+ )
183
194
return jsonify (results )
184
195
185
196
186
- @app .route (' /api/products/facets' , methods = [' GET' ])
197
+ @app .route (" /api/products/facets" , methods = [" GET" ])
187
198
def facets ():
188
- query = request .args .get (' query' )
189
- categories = request .args .getlist (' selectedCategories[]' )
190
- product_types = request .args .getlist (' selectedProductTypes[]' )
191
- brands = request .args .getlist (' selectedBrands[]' )
192
- results = get_facets_data (query , categories = categories ,
193
- product_types = product_types ,
194
- brands = brands )
199
+ query = request .args .get (" query" )
200
+ categories = request .args .getlist (" selectedCategories[]" )
201
+ product_types = request .args .getlist (" selectedProductTypes[]" )
202
+ brands = request .args .getlist (" selectedBrands[]" )
203
+ results = get_facets_data (
204
+ query , categories = categories , product_types = product_types , brands = brands
205
+ )
195
206
return jsonify (results )
196
207
197
208
198
- if __name__ == ' __main__' :
209
+ if __name__ == " __main__" :
199
210
app .run (debug = True )
0 commit comments