Skip to content

Commit 7affdf0

Browse files
authored
Singlestore datasource, query from singlestore service (#130)
1 parent c6d82c3 commit 7affdf0

File tree

2 files changed

+108
-30
lines changed

2 files changed

+108
-30
lines changed
Lines changed: 62 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,67 @@
1+
import json
12
from typing import Dict, List, Optional
23
from pydantic import Field
34
from llmstack.common.blocks.base.schema import BaseSchema
45
from llmstack.common.utils.models import Config
56
from llmstack.datasources.handlers.datasource_processor import DataSourceProcessor, DataSourceSchema
67
from llmstack.datasources.models import DataSource
8+
from llmstack.common.blocks.data.store.vectorstore import Document
9+
10+
from llmstack.common.utils.prequests import post
11+
712

813
class SingleStoreConnection(BaseSchema):
914
host: str = Field(description='Host of the SingleStore instance')
1015
port: int = Field(
1116
description='Port number to connect to the SingleStore instance')
12-
database_name: str = Field(description='SingleStore database name')
1317
username: str = Field(description='SingleStore username')
1418
password: str = Field(description='SingleStore password')
1519
database: str = Field(description='SingleStore database name')
1620

21+
1722
class SingleStoreDatabaseSchema(DataSourceSchema):
18-
connection: Optional[SingleStoreConnection] = Field(description='SingleStore connection details')
19-
23+
connection: Optional[SingleStoreConnection] = Field(
24+
description='SingleStore connection details')
25+
26+
2027
class SingleStoreConnectionConfiguration(Config):
2128
config_type = 'singlestore_connection'
2229
is_encrypted = True
2330
singlestore_config: Optional[Dict]
24-
31+
32+
2533
class SingleStoreDataSource(DataSourceProcessor[SingleStoreDatabaseSchema]):
2634
def __init__(self, datasource: DataSource):
2735
self.datasource = datasource
28-
36+
if self.datasource.config and 'data' in self.datasource.config:
37+
config_dict = SingleStoreConnectionConfiguration().from_dict(
38+
self.datasource.config, self.datasource.profile.decrypt_value)
39+
self._configuration = SingleStoreDatabaseSchema(
40+
**config_dict['singlestore_config'])
41+
self._source_name = self.datasource.name
42+
2943
@staticmethod
3044
def name() -> str:
3145
return 'Single Store'
32-
46+
3347
@staticmethod
3448
def slug() -> str:
3549
return 'singlestore'
36-
50+
3751
@staticmethod
3852
def description() -> str:
3953
return 'Single Store is a distributed SQL database that can be deployed anywhere.'
40-
54+
4155
@staticmethod
4256
def provider_slug() -> str:
4357
return 'singlestore'
44-
58+
4559
@staticmethod
4660
def process_validate_config(config_data: dict, datasource: DataSource) -> dict:
47-
return SingleStoreConnectionConfiguration(singlestore_config=config_data).to_dict(
61+
return SingleStoreConnectionConfiguration(singlestore_config=config_data).to_dict(
4862
encrypt_fn=datasource.profile.encrypt_value
4963
)
50-
64+
5165
def validate_and_process(self, data: dict):
5266
raise NotImplementedError
5367

@@ -56,16 +70,44 @@ def get_data_documents(self, data: dict):
5670

5771
def add_entry(self, data: dict):
5872
raise NotImplementedError
59-
73+
74+
def _sql_search(self, query: str, **kwargs):
75+
if self._configuration.connection.host.startswith('https'):
76+
url = f'{self._configuration.connection.host}/api/v2/query/rows'
77+
else:
78+
url = f'https://{self._configuration.connection.host}/api/v2/query/rows'
79+
80+
headers = {
81+
'Accept': 'application/json',
82+
}
83+
data = {
84+
'sql': query,
85+
'database': self._configuration.connection.database
86+
}
87+
88+
response = post(url, headers=headers, data=json.dumps(data), auth=(
89+
self._configuration.connection.username, self._configuration.connection.password))
90+
response.raise_for_status()
91+
# JSON to csv
92+
csv_result = ''
93+
if 'results' in response.json():
94+
if len(response.json()['results']) > 0 and 'rows' in response.json()['results'][0]:
95+
rows = response.json()['results'][0]['rows']
96+
if len(rows) > 0:
97+
csv_result += ','.join(list(map(lambda entry: str(entry),
98+
rows[0].keys()))) + '\n'
99+
for row in rows:
100+
csv_result += ','.join(list(map(lambda entry: str(entry),
101+
row.values()))) + '\n'
102+
103+
return [Document(page_content_key='content', page_content=csv_result, metadata={'score': 0, 'source': self._source_name})]
104+
60105
def similarity_search(self, query: str, **kwargs) -> List[dict]:
61-
# TODO: Implement this
62-
pass
63-
106+
return self._sql_search(query, **kwargs)
107+
64108
def hybrid_search(self, query: str, **kwargs) -> List[dict]:
65-
# TODO: Implement this
66-
pass
67-
68-
109+
return self._sql_search(query, **kwargs)
110+
69111
def delete_entry(self, data: dict):
70112
raise NotImplementedError
71113

@@ -76,4 +118,4 @@ def delete_all_entries(self):
76118
raise NotImplementedError
77119

78120
def get_entry_text(self, data: dict) -> str:
79-
return None, "External Datasource does not support entry text"
121+
return None, "External Datasource does not support entry text"

llmstack/fixtures/initial_data.json

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,9 @@
9191
"api_endpoint": "completions",
9292
"params": {
9393
"type": "object",
94-
"required": ["model"],
94+
"required": [
95+
"model"
96+
],
9597
"properties": {
9698
"n": {
9799
"type": "integer",
@@ -178,7 +180,11 @@
178180
"description": "The number of images to generate. Must be between 1 and 10."
179181
},
180182
"size": {
181-
"enum": ["256x256", "512x512", "1024x1024"],
183+
"enum": [
184+
"256x256",
185+
"512x512",
186+
"1024x1024"
187+
],
182188
"type": "string",
183189
"default": "1024x1024",
184190
"description": "The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024."
@@ -188,7 +194,10 @@
188194
"description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse."
189195
},
190196
"response_format": {
191-
"enum": ["url", "b64_json"],
197+
"enum": [
198+
"url",
199+
"b64_json"
200+
],
192201
"type": "string",
193202
"default": "url",
194203
"description": "The format in which the generated images are returned. Must be one of url or b64_json."
@@ -211,7 +220,10 @@
211220
"api_endpoint": "text2image",
212221
"params": {
213222
"type": "object",
214-
"required": ["engine", "seed"],
223+
"required": [
224+
"engine",
225+
"seed"
226+
],
215227
"properties": {
216228
"seed": {
217229
"type": "integer",
@@ -296,7 +308,9 @@
296308
"api_endpoint": "generate",
297309
"params": {
298310
"type": "object",
299-
"required": ["model"],
311+
"required": [
312+
"model"
313+
],
300314
"properties": {
301315
"k": {
302316
"type": "integer",
@@ -321,7 +335,11 @@
321335
"description": "The ID of a custom playground preset. You can create presets in the playground. If you use a preset, the prompt parameter becomes optional, and any included parameters will override the preset's parameters."
322336
},
323337
"truncate": {
324-
"enum": ["NONE", "START", "END"],
338+
"enum": [
339+
"NONE",
340+
"START",
341+
"END"
342+
],
325343
"type": "string",
326344
"default": "END",
327345
"description": "Passing START will discard the start of the input. END will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.If NONE is selected, when the input exceeds the maximum input token length an error will be returned."
@@ -364,7 +382,11 @@
364382
"description": "Can be used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation."
365383
},
366384
"return_likelihoods": {
367-
"enum": ["GENERATION", "ALL", "NONE"],
385+
"enum": [
386+
"GENERATION",
387+
"ALL",
388+
"NONE"
389+
],
368390
"type": "string",
369391
"default": "NONE",
370392
"description": "If GENERATION is selected, the token likelihoods will only be provided for generated text.If ALL is selected, the token likelihoods will be provided both for the prompt and the generated text."
@@ -387,7 +409,9 @@
387409
"api_endpoint": "chat/completions",
388410
"params": {
389411
"type": "object",
390-
"required": ["model"],
412+
"required": [
413+
"model"
414+
],
391415
"properties": {
392416
"n": {
393417
"type": "integer",
@@ -399,7 +423,10 @@
399423
"description": "Up to 4 sequences where the API will stop generating further tokens."
400424
},
401425
"model": {
402-
"enum": ["gpt-3.5-turbo", "gpt-3.5-turbo-0301"],
426+
"enum": [
427+
"gpt-3.5-turbo",
428+
"gpt-3.5-turbo-0301"
429+
],
403430
"type": "string",
404431
"default": "gpt-3.5-turbo",
405432
"description": "ID of the model to use. Currently, only gpt-3.5-turbo and gpt-3.5-turbo-0301 are supported."
@@ -748,5 +775,14 @@
748775
"slug": "weaviate",
749776
"description": ""
750777
}
778+
},
779+
{
780+
"model": "datasources.datasourcetype",
781+
"pk": 6,
782+
"fields": {
783+
"name": "SingleStore",
784+
"slug": "singlestore",
785+
"description": ""
786+
}
751787
}
752-
]
788+
]

0 commit comments

Comments
 (0)