Singlestore datasource, query from singlestore service (#130)

vegito22 · web-flow · commit 7affdf073836 · 2024-01-04T15:03:13.000-08:00
diff --git a/llmstack/datasources/handlers/databases/singlestore.py b/llmstack/datasources/handlers/databases/singlestore.py
@@ -1,53 +1,67 @@
+import json
 from typing import Dict, List, Optional
 from pydantic import Field
 from llmstack.common.blocks.base.schema import BaseSchema
 from llmstack.common.utils.models import Config
 from llmstack.datasources.handlers.datasource_processor import DataSourceProcessor, DataSourceSchema
 from llmstack.datasources.models import DataSource
+from llmstack.common.blocks.data.store.vectorstore import Document
+
+from llmstack.common.utils.prequests import post
+
 
 class SingleStoreConnection(BaseSchema):
     host: str = Field(description='Host of the SingleStore instance')
     port: int = Field(
         description='Port number to connect to the SingleStore instance')
-    database_name: str = Field(description='SingleStore database name')
     username: str = Field(description='SingleStore username')
     password: str = Field(description='SingleStore password')
     database: str = Field(description='SingleStore database name')
 
+
 class SingleStoreDatabaseSchema(DataSourceSchema):
-    connection: Optional[SingleStoreConnection] =  Field(description='SingleStore connection details')
-    
+    connection: Optional[SingleStoreConnection] = Field(
+        description='SingleStore connection details')
+
+
 class SingleStoreConnectionConfiguration(Config):
     config_type = 'singlestore_connection'
     is_encrypted = True
     singlestore_config: Optional[Dict]
-    
+
+
 class SingleStoreDataSource(DataSourceProcessor[SingleStoreDatabaseSchema]):
     def __init__(self, datasource: DataSource):
         self.datasource = datasource
-    
+        if self.datasource.config and 'data' in self.datasource.config:
+            config_dict = SingleStoreConnectionConfiguration().from_dict(
+                self.datasource.config, self.datasource.profile.decrypt_value)
+            self._configuration = SingleStoreDatabaseSchema(
+                **config_dict['singlestore_config'])
+        self._source_name = self.datasource.name
+
     @staticmethod
     def name() -> str:
         return 'Single Store'
-    
+
     @staticmethod
     def slug() -> str:
         return 'singlestore'
-    
+
     @staticmethod
     def description() -> str:
         return 'Single Store is a distributed SQL database that can be deployed anywhere.'
-    
+
     @staticmethod
     def provider_slug() -> str:
         return 'singlestore'
-    
+
     @staticmethod
     def process_validate_config(config_data: dict, datasource: DataSource) -> dict:
-        return  SingleStoreConnectionConfiguration(singlestore_config=config_data).to_dict(
+        return SingleStoreConnectionConfiguration(singlestore_config=config_data).to_dict(
             encrypt_fn=datasource.profile.encrypt_value
         )
-        
+
     def validate_and_process(self, data: dict):
         raise NotImplementedError
 
@@ -56,16 +70,44 @@ def get_data_documents(self, data: dict):
 
     def add_entry(self, data: dict):
         raise NotImplementedError
-    
+
+    def _sql_search(self, query: str, **kwargs):
+        if self._configuration.connection.host.startswith('https'):
+            url = f'{self._configuration.connection.host}/api/v2/query/rows'
+        else:
+            url = f'https://{self._configuration.connection.host}/api/v2/query/rows'
+
+        headers = {
+            'Accept': 'application/json',
+        }
+        data = {
+            'sql': query,
+            'database': self._configuration.connection.database
+        }
+
+        response = post(url, headers=headers, data=json.dumps(data), auth=(
+            self._configuration.connection.username, self._configuration.connection.password))
+        response.raise_for_status()
+        # JSON to csv
+        csv_result = ''
+        if 'results' in response.json():
+            if len(response.json()['results']) > 0 and 'rows' in response.json()['results'][0]:
+                rows = response.json()['results'][0]['rows']
+                if len(rows) > 0:
+                    csv_result += ','.join(list(map(lambda entry: str(entry),
+                                                    rows[0].keys()))) + '\n'
+                    for row in rows:
+                        csv_result += ','.join(list(map(lambda entry: str(entry),
+                                                        row.values()))) + '\n'
+
+        return [Document(page_content_key='content', page_content=csv_result, metadata={'score': 0, 'source': self._source_name})]
+
     def similarity_search(self, query: str, **kwargs) -> List[dict]:
-        # TODO: Implement this
-        pass 
-    
+        return self._sql_search(query, **kwargs)
+
     def hybrid_search(self, query: str, **kwargs) -> List[dict]:
-        # TODO: Implement this
-        pass 
-    
-    
+        return self._sql_search(query, **kwargs)
+
     def delete_entry(self, data: dict):
         raise NotImplementedError
 
@@ -76,4 +118,4 @@ def delete_all_entries(self):
         raise NotImplementedError
 
     def get_entry_text(self, data: dict) -> str:
-        return None, "External Datasource does not support entry text"
+        return None, "External Datasource does not support entry text"
diff --git a/llmstack/fixtures/initial_data.json b/llmstack/fixtures/initial_data.json
@@ -91,7 +91,9 @@
       "api_endpoint": "completions",
       "params": {
         "type": "object",
-        "required": ["model"],
+        "required": [
+          "model"
+        ],
         "properties": {
           "n": {
             "type": "integer",
@@ -178,7 +180,11 @@
             "description": "The number of images to generate. Must be between 1 and 10."
           },
           "size": {
-            "enum": ["256x256", "512x512", "1024x1024"],
+            "enum": [
+              "256x256",
+              "512x512",
+              "1024x1024"
+            ],
             "type": "string",
             "default": "1024x1024",
             "description": "The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024."
@@ -188,7 +194,10 @@
             "description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse."
           },
           "response_format": {
-            "enum": ["url", "b64_json"],
+            "enum": [
+              "url",
+              "b64_json"
+            ],
             "type": "string",
             "default": "url",
             "description": "The format in which the generated images are returned. Must be one of url or b64_json."
@@ -211,7 +220,10 @@
       "api_endpoint": "text2image",
       "params": {
         "type": "object",
-        "required": ["engine", "seed"],
+        "required": [
+          "engine",
+          "seed"
+        ],
         "properties": {
           "seed": {
             "type": "integer",
@@ -296,7 +308,9 @@
       "api_endpoint": "generate",
       "params": {
         "type": "object",
-        "required": ["model"],
+        "required": [
+          "model"
+        ],
         "properties": {
           "k": {
             "type": "integer",
@@ -321,7 +335,11 @@
             "description": "The ID of a custom playground preset. You can create presets in the playground. If you use a preset, the prompt parameter becomes optional, and any included parameters will override the preset's parameters."
           },
           "truncate": {
-            "enum": ["NONE", "START", "END"],
+            "enum": [
+              "NONE",
+              "START",
+              "END"
+            ],
             "type": "string",
             "default": "END",
             "description": "Passing START will discard the start of the input. END will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.If NONE is selected, when the input exceeds the maximum input token length an error will be returned."
@@ -364,7 +382,11 @@
             "description": "Can be used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation."
           },
           "return_likelihoods": {
-            "enum": ["GENERATION", "ALL", "NONE"],
+            "enum": [
+              "GENERATION",
+              "ALL",
+              "NONE"
+            ],
             "type": "string",
             "default": "NONE",
             "description": "If GENERATION is selected, the token likelihoods will only be provided for generated text.If ALL is selected, the token likelihoods will be provided both for the prompt and the generated text."
@@ -387,7 +409,9 @@
       "api_endpoint": "chat/completions",
       "params": {
         "type": "object",
-        "required": ["model"],
+        "required": [
+          "model"
+        ],
         "properties": {
           "n": {
             "type": "integer",
@@ -399,7 +423,10 @@
             "description": "Up to 4 sequences where the API will stop generating further tokens."
           },
           "model": {
-            "enum": ["gpt-3.5-turbo", "gpt-3.5-turbo-0301"],
+            "enum": [
+              "gpt-3.5-turbo",
+              "gpt-3.5-turbo-0301"
+            ],
             "type": "string",
             "default": "gpt-3.5-turbo",
             "description": "ID of the model to use. Currently, only gpt-3.5-turbo and gpt-3.5-turbo-0301 are supported."
@@ -748,5 +775,14 @@
       "slug": "weaviate",
       "description": ""
     }
+  },
+  {
+    "model": "datasources.datasourcetype",
+    "pk": 6,
+    "fields": {
+      "name": "SingleStore",
+      "slug": "singlestore",
+      "description": ""
+    }
   }
-]
+]