Skip to content

Commit fd80079

Browse files
authored
Microsoft Copilot with Elasticsearch supporting blog content (#456)
1 parent 36a4a78 commit fd80079

File tree

7 files changed

+830
-0
lines changed

7 files changed

+830
-0
lines changed
Lines changed: 332 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,332 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "386dbd82",
6+
"metadata": {},
7+
"source": [
8+
"# Enhance Microsoft Copilot with Elasticsearch\n",
9+
"\n",
10+
"This notebook execute an API that allows you to search for invoices using Elasticsearch generating a Ngrok tunnel to expose the API to the internet. This notebook is based on the article [Enhance Microsoft Copilot with Elasticsearch](https://www.elastic.co/blog/enhance-microsoft-copilot-with-elasticsearch)."
11+
]
12+
},
13+
{
14+
"cell_type": "code",
15+
"execution_count": null,
16+
"id": "d460f865",
17+
"metadata": {},
18+
"outputs": [],
19+
"source": [
20+
"%pip install fastapi pyngrok uvicorn nest-asyncio elasticsearch==9 -q"
21+
]
22+
},
23+
{
24+
"cell_type": "code",
25+
"execution_count": null,
26+
"id": "3ac47371",
27+
"metadata": {},
28+
"outputs": [],
29+
"source": [
30+
"import os\n",
31+
"import json\n",
32+
"from getpass import getpass\n",
33+
"from datetime import datetime\n",
34+
"\n",
35+
"import nest_asyncio\n",
36+
"import uvicorn\n",
37+
"\n",
38+
"from fastapi import FastAPI, Query\n",
39+
"from pyngrok import conf, ngrok\n",
40+
"\n",
41+
"from elasticsearch.helpers import bulk\n",
42+
"from elasticsearch import Elasticsearch"
43+
]
44+
},
45+
{
46+
"cell_type": "markdown",
47+
"id": "64167eee",
48+
"metadata": {},
49+
"source": [
50+
"## Setup Variables"
51+
]
52+
},
53+
{
54+
"cell_type": "code",
55+
"execution_count": null,
56+
"id": "aa378fdb",
57+
"metadata": {},
58+
"outputs": [],
59+
"source": [
60+
"os.environ[\"ELASTICSEARCH_ENDPOINT\"] = getpass(\"Elastic Endpoint: \")\n",
61+
"os.environ[\"ELASTICSEARCH_API_KEY\"] = getpass(\"Elastic Api Key: \")\n",
62+
"os.environ[\"NGROK_AUTH_TOKEN\"] = getpass(\"Ngrok Auth Token: \")\n",
63+
"\n",
64+
"\n",
65+
"INDEX_NAME = \"invoices\""
66+
]
67+
},
68+
{
69+
"cell_type": "markdown",
70+
"id": "31041b60",
71+
"metadata": {},
72+
"source": [
73+
"## Elasticsearch client"
74+
]
75+
},
76+
{
77+
"cell_type": "code",
78+
"execution_count": null,
79+
"id": "4d8a8201",
80+
"metadata": {},
81+
"outputs": [],
82+
"source": [
83+
"_client = Elasticsearch(\n",
84+
" os.environ[\"ELASTICSEARCH_ENDPOINT\"],\n",
85+
" api_key=os.environ[\"ELASTICSEARCH_API_KEY\"],\n",
86+
")"
87+
]
88+
},
89+
{
90+
"cell_type": "markdown",
91+
"id": "07578680",
92+
"metadata": {},
93+
"source": [
94+
"## Mappings"
95+
]
96+
},
97+
{
98+
"cell_type": "code",
99+
"execution_count": null,
100+
"id": "c34a804a",
101+
"metadata": {},
102+
"outputs": [],
103+
"source": [
104+
"try:\n",
105+
" _client.indices.create(\n",
106+
" index=INDEX_NAME,\n",
107+
" body={\n",
108+
" \"mappings\": {\n",
109+
" \"properties\": {\n",
110+
" \"id\": {\"type\": \"keyword\"},\n",
111+
" \"file_url\": {\"type\": \"keyword\"},\n",
112+
" \"issue_date\": {\"type\": \"date\"},\n",
113+
" \"description\": {\"type\": \"text\", \"copy_to\": \"semantic_field\"},\n",
114+
" \"services\": {\n",
115+
" \"type\": \"object\",\n",
116+
" \"properties\": {\n",
117+
" \"name\": {\n",
118+
" \"type\": \"text\",\n",
119+
" \"copy_to\": \"semantic_field\",\n",
120+
" },\n",
121+
" \"price\": {\"type\": \"float\"},\n",
122+
" },\n",
123+
" },\n",
124+
" \"total_amount\": {\n",
125+
" \"type\": \"float\",\n",
126+
" },\n",
127+
" \"semantic_field\": {\"type\": \"semantic_text\"},\n",
128+
" }\n",
129+
" }\n",
130+
" },\n",
131+
" )\n",
132+
"\n",
133+
" print(\"index created successfully\")\n",
134+
"except Exception as e:\n",
135+
" print(\n",
136+
" f\"Error creating inference endpoint: {e.info['error']['root_cause'][0]['reason'] }\"\n",
137+
" )"
138+
]
139+
},
140+
{
141+
"cell_type": "markdown",
142+
"id": "02a2c25a",
143+
"metadata": {},
144+
"source": [
145+
"## Ingesting documents to Elasticsearch"
146+
]
147+
},
148+
{
149+
"cell_type": "code",
150+
"execution_count": null,
151+
"id": "69f388c0",
152+
"metadata": {},
153+
"outputs": [],
154+
"source": [
155+
"with open(\"invoices_data.json\", \"r\", encoding=\"utf-8\") as f:\n",
156+
" invoices = json.load(f)"
157+
]
158+
},
159+
{
160+
"cell_type": "code",
161+
"execution_count": null,
162+
"id": "b96c42fb",
163+
"metadata": {},
164+
"outputs": [],
165+
"source": [
166+
"def build_data():\n",
167+
" for doc in invoices:\n",
168+
" yield {\"_index\": INDEX_NAME, \"_source\": doc}\n",
169+
"\n",
170+
"\n",
171+
"try:\n",
172+
" success, errors = bulk(_client, build_data())\n",
173+
" print(f\"{success} documents indexed successfully\")\n",
174+
"\n",
175+
" if errors:\n",
176+
" print(\"Errors during indexing:\", errors)\n",
177+
"\n",
178+
"except Exception as e:\n",
179+
" print(f\"Error: {str(e)}, please wait some seconds and try again.\")"
180+
]
181+
},
182+
{
183+
"cell_type": "markdown",
184+
"id": "d38c1869",
185+
"metadata": {},
186+
"source": [
187+
"## Building API"
188+
]
189+
},
190+
{
191+
"cell_type": "code",
192+
"execution_count": null,
193+
"id": "2ad221fb",
194+
"metadata": {},
195+
"outputs": [],
196+
"source": [
197+
"app = FastAPI()"
198+
]
199+
},
200+
{
201+
"cell_type": "code",
202+
"execution_count": null,
203+
"id": "76106dad",
204+
"metadata": {},
205+
"outputs": [],
206+
"source": [
207+
"@app.get(\"/search/semantic\")\n",
208+
"async def search_semantic(query: str = Query(None)):\n",
209+
" try:\n",
210+
" result = _client.search(\n",
211+
" index=INDEX_NAME,\n",
212+
" query={\n",
213+
" \"semantic\": {\n",
214+
" \"field\": \"semantic_field\",\n",
215+
" \"query\": query,\n",
216+
" }\n",
217+
" },\n",
218+
" )\n",
219+
"\n",
220+
" hits = result[\"hits\"][\"hits\"]\n",
221+
" results = [{\"score\": hit[\"_score\"], **hit[\"_source\"]} for hit in hits]\n",
222+
"\n",
223+
" return results\n",
224+
" except Exception as e:\n",
225+
" return Exception(f\"Error: {str(e)}\")\n",
226+
"\n",
227+
"\n",
228+
"@app.get(\"/search/by-date\")\n",
229+
"async def search_by_date(from_date: str = Query(None), to_date: str = Query(None)):\n",
230+
" try:\n",
231+
" from_dt = datetime.strptime(from_date, \"%m/%d/%Y %I:%M:%S %p\")\n",
232+
" to_dt = datetime.strptime(to_date, \"%m/%d/%Y %I:%M:%S %p\")\n",
233+
"\n",
234+
" formatted_from = from_dt.strftime(\"%d/%m/%Y\")\n",
235+
" formatted_to = to_dt.strftime(\"%d/%m/%Y\")\n",
236+
"\n",
237+
" result = _client.search(\n",
238+
" index=INDEX_NAME,\n",
239+
" query={\n",
240+
" \"range\": {\n",
241+
" \"issue_date\": {\n",
242+
" \"gte\": formatted_from,\n",
243+
" \"lte\": formatted_to,\n",
244+
" \"format\": \"dd/MM/yyyy\",\n",
245+
" }\n",
246+
" }\n",
247+
" },\n",
248+
" )\n",
249+
"\n",
250+
" hits = result[\"hits\"][\"hits\"]\n",
251+
" results = [hit[\"_source\"] for hit in hits]\n",
252+
"\n",
253+
" return results\n",
254+
" except Exception as e:\n",
255+
" return Exception(f\"Error: {str(e)}\")"
256+
]
257+
},
258+
{
259+
"cell_type": "markdown",
260+
"id": "cf1460e9",
261+
"metadata": {},
262+
"source": [
263+
"## Running the API"
264+
]
265+
},
266+
{
267+
"cell_type": "code",
268+
"execution_count": null,
269+
"id": "517c85c3",
270+
"metadata": {},
271+
"outputs": [],
272+
"source": [
273+
"conf.get_default().auth_token = os.environ[\"NGROK_AUTH_TOKEN\"]\n",
274+
"ngrok_tunnel = ngrok.connect(8000)\n",
275+
"\n",
276+
"print(\"Public URL:\", ngrok_tunnel.public_url)\n",
277+
"\n",
278+
"nest_asyncio.apply()\n",
279+
"uvicorn.run(app, port=8000)"
280+
]
281+
},
282+
{
283+
"cell_type": "markdown",
284+
"id": "ccffd29a",
285+
"metadata": {},
286+
"source": [
287+
"## Delete the index"
288+
]
289+
},
290+
{
291+
"cell_type": "code",
292+
"execution_count": null,
293+
"id": "991ba4e4",
294+
"metadata": {},
295+
"outputs": [],
296+
"source": [
297+
"def print_results(results):\n",
298+
" if results.get(\"acknowledged\", False):\n",
299+
" print(\"DELETED successfully.\")\n",
300+
"\n",
301+
" if \"error\" in results:\n",
302+
" print(f\"ERROR: {results['error']['root_cause'][0]['reason']}\")\n",
303+
"\n",
304+
"\n",
305+
"# Cleanup - Delete Index\n",
306+
"result = _client.indices.delete(index=INDEX_NAME, ignore=[400, 404])\n",
307+
"print_results(result)"
308+
]
309+
}
310+
],
311+
"metadata": {
312+
"kernelspec": {
313+
"display_name": "Python 3",
314+
"language": "python",
315+
"name": "python3"
316+
},
317+
"language_info": {
318+
"codemirror_mode": {
319+
"name": "ipython",
320+
"version": 3
321+
},
322+
"file_extension": ".py",
323+
"mimetype": "text/x-python",
324+
"name": "python",
325+
"nbconvert_exporter": "python",
326+
"pygments_lexer": "ipython3",
327+
"version": "3.13.2"
328+
}
329+
},
330+
"nbformat": 4,
331+
"nbformat_minor": 5
332+
}

0 commit comments

Comments
 (0)