Merge branch 'pre/beta' of https://github.com/ScrapeGraphAI/Scrapegraph-ai into pre/beta

VinciGit00 · VinciGit00 · commit 5100368b5ada · 2024-08-16T09:01:30.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.14.0-beta.4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.14.0-beta.3...v1.14.0-beta.4) (2024-08-15)
+
+
+### Features
+
+* update abstract graph ([c77231c](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/c77231c983bd6e154eefd26422cd156da4c8b7bb))
+
 ## [1.14.0-beta.3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.14.0-beta.2...v1.14.0-beta.3) (2024-08-13)
 
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "scrapegraphai"
 
 
-version = "1.14.0b3"
+version = "1.14.0b4"
 
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
@@ -146,90 +146,84 @@ def handle_model(model_name, provider, token_key, default_token=8192):
             with warnings.catch_warnings():
                 warnings.simplefilter("ignore")
                 return init_chat_model(**llm_params)
-
-        if "azure" in llm_params["model"]:
-            model_name = llm_params["model"].split("/")[-1]
-            return handle_model(model_name, "azure_openai", model_name)
-
-        if "gpt-" in llm_params["model"]:
-            return handle_model(llm_params["model"], "openai", llm_params["model"])
-
-        if "fireworks" in llm_params["model"]:
-            model_name = "/".join(llm_params["model"].split("/")[1:])
-            token_key = llm_params["model"].split("/")[-1]
-            return handle_model(model_name, "fireworks", token_key)
-
-        if "gemini" in llm_params["model"]:
-            model_name = llm_params["model"].split("/")[-1]
-            return handle_model(model_name, "google_genai", model_name)
-
-        if llm_params["model"].startswith("claude"):
-            model_name = llm_params["model"].split("/")[-1]
-            return handle_model(model_name, "anthropic", model_name)
-
-        if llm_params["model"].startswith("vertexai"):
-            return handle_model(llm_params["model"], "google_vertexai", llm_params["model"])
         
-        if "ollama" in llm_params["model"]:
-            model_name = llm_params["model"].split("ollama/")[-1]
-            token_key = model_name if "model_tokens" not in llm_params else llm_params["model_tokens"]
-            return handle_model(model_name, "ollama", token_key)
-    
-        if "hugging_face" in llm_params["model"]:
-            model_name = llm_params["model"].split("/")[-1]
-            return handle_model(model_name, "hugging_face", model_name)
-
-        if "groq" in llm_params["model"]:
-            model_name = llm_params["model"].split("/")[-1]
-            return handle_model(model_name, "groq", model_name)
-
-        if "bedrock" in llm_params["model"]:
-            model_name = llm_params["model"].split("/")[-1]
-            return handle_model(model_name, "bedrock", model_name)
-
-        if "claude-3-" in llm_params["model"]:
-            return handle_model(llm_params["model"], "anthropic", "claude3")
-        
-        if llm_params["model"].startswith("mistral"):
-            model_name = llm_params["model"].split("/")[-1]
-            return handle_model(model_name, "mistralai", model_name)
-
-        # Instantiate the language model based on the model name (models that do not use the common interface)
-        if "deepseek" in llm_params["model"]:
-            try:
-                self.model_token = models_tokens["deepseek"][llm_params["model"]]
-            except KeyError:
-                print("model not found, using default token size (8192)")
-                self.model_token = 8192
-            return DeepSeek(llm_params)
-
-        if "ernie" in llm_params["model"]:
-            try:
-                self.model_token = models_tokens["ernie"][llm_params["model"]]
-            except KeyError:
-                print("model not found, using default token size (8192)")
-                self.model_token = 8192
-            return ErnieBotChat(**llm_params)
-
-        if "oneapi" in llm_params["model"]:
-            # take the model after the last dash
-            llm_params["model"] = llm_params["model"].split("/")[-1]
-            try:
-                self.model_token = models_tokens["oneapi"][llm_params["model"]]
-            except KeyError as exc:
-                raise KeyError("Model not supported") from exc
-            return OneApi(llm_params)
-
-        if "nvidia" in llm_params["model"]:
-            try:
-                self.model_token = models_tokens["nvidia"][llm_params["model"].split("/")[-1]]
-                llm_params["model"] = "/".join(llm_params["model"].split("/")[1:])
-            except KeyError as exc:
-                raise KeyError("Model not supported") from exc
-            return ChatNVIDIA(**llm_params)
-
-        # Raise an error if the model did not match any of the previous cases
-        raise ValueError("Model provided by the configuration not supported")
+        known_models = ["openai", "azure_openai", "google_genai", "ollama", "oneapi", "nvidia", "groq", "google_vertexai", "bedrock", "mistralai", "hugging_face", "deepseek", "ernie", "fireworks"]
+
+        if llm_params["model"] not in known_models:
+            raise ValueError(f"Model '{llm_params['model']}' is not supported")
+
+        try:
+            if "fireworks" in llm_params["model"]:
+                model_name = "/".join(llm_params["model"].split("/")[1:])
+                token_key = llm_params["model"].split("/")[-1]
+                return handle_model(model_name, "fireworks", token_key)
+
+            elif "gemini" in llm_params["model"]:
+                model_name = llm_params["model"].split("/")[-1]
+                return handle_model(model_name, "google_genai", model_name)
+
+            elif llm_params["model"].startswith("claude"):
+                model_name = llm_params["model"].split("/")[-1]
+                return handle_model(model_name, "anthropic", model_name)
+
+            elif llm_params["model"].startswith("vertexai"):
+                return handle_model(llm_params["model"], "google_vertexai", llm_params["model"])
+
+            elif "gpt-" in llm_params["model"]:
+                return handle_model(llm_params["model"], "openai", llm_params["model"])
+
+            elif "ollama" in llm_params["model"]:
+                model_name = llm_params["model"].split("ollama/")[-1]
+                token_key = model_name if "model_tokens" not in llm_params else llm_params["model_tokens"]
+                return handle_model(model_name, "ollama", token_key)
+
+            elif "claude-3-" in llm_params["model"]:
+                return handle_model(llm_params["model"], "anthropic", "claude3")
+
+            elif llm_params["model"].startswith("mistral"):
+                model_name = llm_params["model"].split("/")[-1]
+                return handle_model(model_name, "mistralai", model_name)
+
+            # Instantiate the language model based on the model name (models that do not use the common interface)
+            elif "deepseek" in llm_params["model"]:
+                try:
+                    self.model_token = models_tokens["deepseek"][llm_params["model"]]
+                except KeyError:
+                    print("model not found, using default token size (8192)")
+                    self.model_token = 8192
+                return DeepSeek(llm_params)
+
+            elif "ernie" in llm_params["model"]:
+                try:
+                    self.model_token = models_tokens["ernie"][llm_params["model"]]
+                except KeyError:
+                    print("model not found, using default token size (8192)")
+                    self.model_token = 8192
+                return ErnieBotChat(llm_params)
+
+            elif "oneapi" in llm_params["model"]:
+                # take the model after the last dash
+                llm_params["model"] = llm_params["model"].split("/")[-1]
+                try:
+                    self.model_token = models_tokens["oneapi"][llm_params["model"]]
+                except KeyError:
+                    raise KeyError("Model not supported")
+                return OneApi(llm_params)
+
+            elif "nvidia" in llm_params["model"]:
+                try:
+                    self.model_token = models_tokens["nvidia"][llm_params["model"].split("/")[-1]]
+                    llm_params["model"] = "/".join(llm_params["model"].split("/")[1:])
+                except KeyError:
+                    raise KeyError("Model not supported")
+                return ChatNVIDIA(llm_params)
+
+            else:
+                model_name = llm_params["model"].split("/")[-1]
+                return handle_model(model_name, llm_params["model"], model_name)
+
+        except KeyError as e:
+            print(f"Model not supported: {e}")
 
 
     def get_state(self, key=None) -> dict:
@@ -277,4 +271,4 @@ def _create_graph(self):
     def run(self) -> str:
         """
         Abstract method to execute the graph and return the result.
-        """
+        """
diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py
@@ -16,6 +16,8 @@
         "gpt-4-32k": 32768,
         "gpt-4-32k-0613": 32768,
         "gpt-4o": 128000,
+        "gpt-4o-2024-08-06": 128000,
+        "gpt-4o-2024-05-13": 128000,
         "gpt-4o-mini":128000,
 
     },