Skip to content

Commit 4793d83

Browse files
authored
Release of 8.16.0 (#354)
* add to release * update formatting
1 parent d968b49 commit 4793d83

File tree

6 files changed

+163
-182
lines changed

6 files changed

+163
-182
lines changed

.github/workflows/tests.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@ jobs:
1717
fail-fast: false
1818
matrix:
1919
es_stack:
20-
- 8.14.2
21-
- 8.15.0
22-
- 8.16.0-SNAPSHOT
20+
- 8.15.3
21+
- 8.16.0
22+
- 8.17.0-SNAPSHOT
2323
runs-on: ubuntu-latest
2424
services:
2525
elasticsearch:

supporting-blog-content/hybrid-search-for-an-e-commerce-product-catalogue/product-store-search/api/api.py

Lines changed: 99 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -11,27 +11,30 @@
1111

1212

1313
def get_client_es():
14-
with open('../config.yml', 'r') as file:
14+
with open("../config.yml", "r") as file:
1515
config = yaml.safe_load(file)
16-
return Elasticsearch(
17-
cloud_id=config['cloud_id'],
18-
api_key=config['api_key']
19-
)
16+
return Elasticsearch(cloud_id=config["cloud_id"], api_key=config["api_key"])
2017

2118

2219
def get_text_vector(sentences):
23-
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
20+
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
2421
embeddings = model.encode(sentences)
2522
return embeddings
2623

2724

2825
def build_query(term=None, categories=None, product_types=None, brands=None):
29-
must_query = [{"match_all": {}}] if not term else [{
30-
"multi_match": {
31-
"query": term,
32-
"fields": ["name", "category", "description"]
33-
}
34-
}]
26+
must_query = (
27+
[{"match_all": {}}]
28+
if not term
29+
else [
30+
{
31+
"multi_match": {
32+
"query": term,
33+
"fields": ["name", "category", "description"],
34+
}
35+
}
36+
]
37+
)
3538

3639
filters = []
3740
if categories:
@@ -42,17 +45,23 @@ def build_query(term=None, categories=None, product_types=None, brands=None):
4245
filters.append({"terms": {"brand.keyword": brands}})
4346

4447
return {
45-
"_source": ["id", "brand", "name", "price", "currency", "image_link", "category", "tag_list"],
46-
"query": {
47-
"bool": {
48-
"must": must_query,
49-
"filter": filters
50-
}
51-
}
48+
"_source": [
49+
"id",
50+
"brand",
51+
"name",
52+
"price",
53+
"currency",
54+
"image_link",
55+
"category",
56+
"tag_list",
57+
],
58+
"query": {"bool": {"must": must_query, "filter": filters}},
5259
}
5360

5461

55-
def build_hybrid_query(term=None, categories=None, product_types=None, brands=None, hybrid=False):
62+
def build_hybrid_query(
63+
term=None, categories=None, product_types=None, brands=None, hybrid=False
64+
):
5665
# Standard query
5766
organic_query = build_query(term, categories, product_types, brands)
5867

@@ -65,81 +74,79 @@ def build_hybrid_query(term=None, categories=None, product_types=None, brands=No
6574
"retriever": {
6675
"rrf": {
6776
"retrievers": [
68-
{
69-
"standard": {
70-
"query": organic_query['query']
71-
}
72-
},
77+
{"standard": {"query": organic_query["query"]}},
7378
{
7479
"knn": {
7580
"field": "description_embeddings",
7681
"query_vector": vector,
7782
"k": 5,
7883
"num_candidates": 20,
79-
"filter": {
80-
"bool": {
81-
"filter": []
82-
}
83-
}
84+
"filter": {"bool": {"filter": []}},
8485
}
85-
}
86+
},
8687
],
8788
"rank_window_size": 20,
88-
"rank_constant": 5
89+
"rank_constant": 5,
8990
}
9091
},
91-
"_source": organic_query['_source']
92+
"_source": organic_query["_source"],
9293
}
9394

9495
if categories:
95-
query['retriever']['rrf']['retrievers'][1]['knn']['filter']['bool']['filter'].append({
96-
"terms": {"category": categories}
97-
})
96+
query["retriever"]["rrf"]["retrievers"][1]["knn"]["filter"]["bool"][
97+
"filter"
98+
].append({"terms": {"category": categories}})
9899
if product_types:
99-
query['retriever']['rrf']['retrievers'][1]['knn']['filter']['bool']['filter'].append({
100-
"terms": {"product_type": product_types}
101-
})
100+
query["retriever"]["rrf"]["retrievers"][1]["knn"]["filter"]["bool"][
101+
"filter"
102+
].append({"terms": {"product_type": product_types}})
102103
if brands:
103-
query['retriever']['rrf']['retrievers'][1]['knn']['filter']['bool']['filter'].append({
104-
"terms": {"brand.keyword": brands}
105-
})
104+
query["retriever"]["rrf"]["retrievers"][1]["knn"]["filter"]["bool"][
105+
"filter"
106+
].append({"terms": {"brand.keyword": brands}})
106107
else:
107108
query = organic_query
108109

109110
return query
110111

111112

112-
def search_products(term, categories=None, product_types=None, brands=None, promote_products=[], hybrid=False):
113+
def search_products(
114+
term,
115+
categories=None,
116+
product_types=None,
117+
brands=None,
118+
promote_products=[],
119+
hybrid=False,
120+
):
113121
query = build_hybrid_query(term, categories, product_types, brands, hybrid)
114122

115123
if promote_products and not hybrid:
116124
query = {
117-
"query": {
118-
"pinned": {
119-
"ids": promote_products,
120-
"organic": query['query']
121-
}
122-
},
123-
"_source": query['_source']
125+
"query": {"pinned": {"ids": promote_products, "organic": query["query"]}},
126+
"_source": query["_source"],
124127
}
125128

126129
print(query)
127130
response = get_client_es().search(index="products-catalog", body=query, size=20)
128131

129132
results = []
130-
for hit in response['hits']['hits']:
133+
for hit in response["hits"]["hits"]:
131134
print(f"Product Name: {hit['_source']['name']}, Score: {hit['_score']}")
132135

133-
results.append({
134-
"id": hit['_source']['id'],
135-
"brand": hit['_source']['brand'],
136-
"name": hit['_source']['name'],
137-
"price": hit['_source']['price'],
138-
"currency": hit['_source']['currency'] if hit['_source']['currency'] else "USD",
139-
"image_link": hit['_source']['image_link'],
140-
"category": hit['_source']['category'],
141-
"tags": hit['_source'].get('tag_list', [])
142-
})
136+
results.append(
137+
{
138+
"id": hit["_source"]["id"],
139+
"brand": hit["_source"]["brand"],
140+
"name": hit["_source"]["name"],
141+
"price": hit["_source"]["price"],
142+
"currency": (
143+
hit["_source"]["currency"] if hit["_source"]["currency"] else "USD"
144+
),
145+
"image_link": hit["_source"]["image_link"],
146+
"category": hit["_source"]["category"],
147+
"tags": hit["_source"].get("tag_list", []),
148+
}
149+
)
143150

144151
return results
145152

@@ -149,51 +156,55 @@ def get_facets_data(term, categories=None, product_types=None, brands=None):
149156
query["aggs"] = {
150157
"product_types": {"terms": {"field": "product_type"}},
151158
"categories": {"terms": {"field": "category"}},
152-
"brands": {"terms": {"field": "brand.keyword"}}
159+
"brands": {"terms": {"field": "brand.keyword"}},
153160
}
154161
response = get_client_es().search(index="products-catalog", body=query, size=0)
155162

156163
return {
157164
"product_types": [
158-
{"product_type": bucket['key'], "count": bucket['doc_count']}
159-
for bucket in response['aggregations']['product_types']['buckets']
165+
{"product_type": bucket["key"], "count": bucket["doc_count"]}
166+
for bucket in response["aggregations"]["product_types"]["buckets"]
160167
],
161168
"categories": [
162-
{"category": bucket['key'], "count": bucket['doc_count']}
163-
for bucket in response['aggregations']['categories']['buckets']
169+
{"category": bucket["key"], "count": bucket["doc_count"]}
170+
for bucket in response["aggregations"]["categories"]["buckets"]
164171
],
165172
"brands": [
166-
{"brand": bucket['key'], "count": bucket['doc_count']}
167-
for bucket in response['aggregations']['brands']['buckets']
168-
]
173+
{"brand": bucket["key"], "count": bucket["doc_count"]}
174+
for bucket in response["aggregations"]["brands"]["buckets"]
175+
],
169176
}
170177

171178

172-
@app.route('/api/products/search', methods=['GET'])
179+
@app.route("/api/products/search", methods=["GET"])
173180
def search():
174-
query = request.args.get('query')
175-
categories = request.args.getlist('selectedCategories[]')
176-
product_types = request.args.getlist('selectedProductTypes[]')
177-
brands = request.args.getlist('selectedBrands[]')
178-
hybrid = request.args.get('hybrid', 'False').lower() == 'true'
179-
results = search_products(query, categories=categories, product_types=product_types,
180-
brands=brands,
181-
promote_products=promote_products_free_gluten,
182-
hybrid=hybrid)
181+
query = request.args.get("query")
182+
categories = request.args.getlist("selectedCategories[]")
183+
product_types = request.args.getlist("selectedProductTypes[]")
184+
brands = request.args.getlist("selectedBrands[]")
185+
hybrid = request.args.get("hybrid", "False").lower() == "true"
186+
results = search_products(
187+
query,
188+
categories=categories,
189+
product_types=product_types,
190+
brands=brands,
191+
promote_products=promote_products_free_gluten,
192+
hybrid=hybrid,
193+
)
183194
return jsonify(results)
184195

185196

186-
@app.route('/api/products/facets', methods=['GET'])
197+
@app.route("/api/products/facets", methods=["GET"])
187198
def facets():
188-
query = request.args.get('query')
189-
categories = request.args.getlist('selectedCategories[]')
190-
product_types = request.args.getlist('selectedProductTypes[]')
191-
brands = request.args.getlist('selectedBrands[]')
192-
results = get_facets_data(query, categories=categories,
193-
product_types=product_types,
194-
brands=brands)
199+
query = request.args.get("query")
200+
categories = request.args.getlist("selectedCategories[]")
201+
product_types = request.args.getlist("selectedProductTypes[]")
202+
brands = request.args.getlist("selectedBrands[]")
203+
results = get_facets_data(
204+
query, categories=categories, product_types=product_types, brands=brands
205+
)
195206
return jsonify(results)
196207

197208

198-
if __name__ == '__main__':
209+
if __name__ == "__main__":
199210
app.run(debug=True)

supporting-blog-content/hybrid-search-for-an-e-commerce-product-catalogue/product-store-search/files/dataset/generate_data.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,26 @@
11
import csv
22
import json
33

4-
desired_fields = ["id", "brand", "name", "price", "price_sign", "currency",
5-
"image_link", "description", "rating", "category",
6-
"product_type", "tag_list"]
4+
desired_fields = [
5+
"id",
6+
"brand",
7+
"name",
8+
"price",
9+
"price_sign",
10+
"currency",
11+
"image_link",
12+
"description",
13+
"rating",
14+
"category",
15+
"product_type",
16+
"tag_list",
17+
]
718

819
input_file = "dataset_products.csv" # Replace with your actual filename
920
output_file = "products.json"
1021

1122
# Open CSV file
12-
with open(input_file, 'r') as csvfile:
23+
with open(input_file, "r") as csvfile:
1324
# Read CSV data using DictReader
1425
csv_reader = csv.DictReader(csvfile)
1526

@@ -37,8 +48,8 @@
3748
json_data.append(product_data)
3849

3950
# Open JSON file for writing
40-
with open(output_file, 'w') as jsonfile:
51+
with open(output_file, "w") as jsonfile:
4152
# Write JSON data to file with indentation
4253
json.dump(json_data, jsonfile, indent=4)
4354

44-
print(f"Converted CSV data to JSON and saved to {output_file}")
55+
print(f"Converted CSV data to JSON and saved to {output_file}")

0 commit comments

Comments
 (0)