Merge pull request #303 from VinciGit00/295-scrapegraph-ai接入oneapi模型qwen-turbo

VinciGit00 · web-flow · commit 1d958be44ddd · 2024-05-26T12:17:34.000+02:00
add OneAPI integration
diff --git a/examples/oneapi/smartscraper_oneapi.py b/examples/oneapi/smartscraper_oneapi.py
@@ -0,0 +1,40 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+
+from scrapegraphai.graphs import SmartScraperGraph
+from scrapegraphai.utils import prettify_exec_info
+
+# ************************************************
+# Define the configuration for the graph
+# *********************************************
+
+graph_config = {
+    "llm": {
+        "api_key": "***************************",
+        "model": "oneapi/qwen-turbo",
+        "base_url": "http://127.0.0.1:3000/v1",  # 设置 OneAPI URL
+    },
+    "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        "base_url": "http://127.0.0.1:11434",  # 设置 Ollama URL
+    }
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+smart_scraper_graph = SmartScraperGraph(
+    prompt="该网站为XXXXX,请提取出标题、发布时间、发布来源以及内容摘要,并以中文回答。",
+    # 也可以使用已下载的 HTML 代码的字符串
+    source="http://XXXX",
+    config=graph_config
+)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+result = smart_scraper_graph.run()
+print(result)
+print(prettify_exec_info(result))
diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
@@ -22,6 +22,7 @@
     HuggingFace,
     Ollama,
     OpenAI,
+    OneApi
 )
 from ..utils.logging import set_verbosity_debug, set_verbosity_warning
 
@@ -55,19 +56,20 @@ class AbstractGraph(ABC):
         ...         # Implementation of graph creation here
         ...         return graph
         ...
-        >>> my_graph = MyGraph("Example Graph", {"llm": {"model": "gpt-3.5-turbo"}}, "example_source")
+        >>> my_graph = MyGraph("Example Graph", 
+        {"llm": {"model": "gpt-3.5-turbo"}}, "example_source")
         >>> result = my_graph.run()
     """
 
-    def __init__(self, prompt: str, config: dict, source: Optional[str] = None, schema: Optional[str] = None):
+    def __init__(self, prompt: str, config: dict, 
+                 source: Optional[str] = None, schema: Optional[str] = None):
 
         self.prompt = prompt
         self.source = source
         self.config = config
         self.schema = schema
         self.llm_model = self._create_llm(config["llm"], chat=True)
-        self.embedder_model = self._create_default_embedder(llm_config=config["llm"]
-                                                            ) if "embeddings" not in config else self._create_embedder(
+        self.embedder_model = self._create_default_embedder(llm_config=config["llm"]                                                            ) if "embeddings" not in config else self._create_embedder(
             config["embeddings"])
         self.verbose = False if config is None else config.get(
             "verbose", False)
@@ -99,7 +101,7 @@ def __init__(self, prompt: str, config: dict, source: Optional[str] = None, sche
             "llm_model": self.llm_model,
             "embedder_model": self.embedder_model
             }
-        
+       
         self.set_common_params(common_params, overwrite=False)
 
         # set burr config
@@ -174,7 +176,14 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
             except KeyError as exc:
                 raise KeyError("Model not supported") from exc
             return OpenAI(llm_params)
-
+        elif "oneapi" in llm_params["model"]:
+            # take the model after the last dash
+            llm_params["model"] = llm_params["model"].split("/")[-1]
+            try:
+                self.model_token = models_tokens["oneapi"][llm_params["model"]]
+            except KeyError as exc:
+                raise KeyError("Model Model not supported") from exc
+            return OneApi(llm_params)
         elif "azure" in llm_params["model"]:
             # take the model after the last dash
             llm_params["model"] = llm_params["model"].split("/")[-1]
diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py
@@ -80,6 +80,9 @@
         "snowflake-arctic-embed:l": 8192,
         "mxbai-embed-large": 512,
     },
+    "oneapi": {
+        "qwen-turbo": 16380
+    },
     "groq": {
         "llama3-8b-8192": 8192,
         "llama3-70b-8192": 8192,
diff --git a/scrapegraphai/models/oneapi.py b/scrapegraphai/models/oneapi.py
@@ -0,0 +1,17 @@
+""" 
+OpenAI Module
+"""
+from langchain_openai import ChatOpenAI
+
+
+class OneApi(ChatOpenAI):
+    """
+    A wrapper for the OneApi class that provides default configuration
+    and could be extended with additional methods if needed.
+
+    Args:
+        llm_config (dict): Configuration parameters for the language model.
+    """
+
+    def __init__(self, llm_config: dict):
+        super().__init__(**llm_config)