ScrapeGraphAI
diff --git a/‎CHANGELOG.md
Lines changed: 12 additions & 1 deletion b/‎CHANGELOG.md
Lines changed: 12 additions & 1 deletion
diff --git a/‎README.md
Lines changed: 22 additions & 0 deletions b/‎README.md
Lines changed: 22 additions & 0 deletions
diff --git a/‎docs/README.md
Lines changed: 0 additions & 11 deletions b/‎docs/README.md
Lines changed: 0 additions & 11 deletions
diff --git a/‎docs/source/scrapers/llm.rst
Lines changed: 32 additions & 0 deletions b/‎docs/source/scrapers/llm.rst
Lines changed: 32 additions & 0 deletions
diff --git a/‎examples/model_instance/.env.example
Lines changed: 1 addition & 0 deletions b/‎examples/model_instance/.env.example
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/model_instance/smart_scraper_with_model_instace.py
Lines changed: 53 additions & 0 deletions b/‎examples/model_instance/smart_scraper_with_model_instace.py
Lines changed: 53 additions & 0 deletions
diff --git a/‎examples/moonshot/.env.example
Lines changed: 1 addition & 0 deletions b/‎examples/moonshot/.env.example
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/moonshot/readme.md
Lines changed: 1 addition & 0 deletions b/‎examples/moonshot/readme.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/moonshot/smart_scraper_with_moonshot.py
Lines changed: 53 additions & 0 deletions b/‎examples/moonshot/smart_scraper_with_moonshot.py
Lines changed: 53 additions & 0 deletions
diff --git a/‎pyproject.toml
Lines changed: 22 additions & 13 deletions b/‎pyproject.toml
Lines changed: 22 additions & 13 deletions
@@ -1,15 +1,26 @@
 ## [1.14.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.14.0...v1.14.1) (2024-08-24)
 
 
+
+### Bug Fixes
+
+
+* update abstract graph ([86fe5fc](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/86fe5fcaf1a6ba28786678874378f07fba1db40f))
+
+## [1.15.0-beta.2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.15.0-beta.1...v1.15.0-beta.2) (2024-08-23)
+
+
 ### Bug Fixes
 
-* add claude3.5 sonnet ([ee8f8b3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/ee8f8b31ecfe4ffd311528d2f48cb055e4609d99))
+* abstract graph ([cf1fada](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/cf1fada36a6716cb0e24bbc5da7509446a964145))
 
 
 ### Docs
 
 * added sponsors ([b3a2d0d](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/b3a2d0d65a41f6e645fac3fc84f702fdf64b951c))
 
+
+#
 ## [1.14.0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.13.3...v1.14.0) (2024-08-20)
 
 
 
@@ -32,6 +32,28 @@ playwright install
 
 **Note**: it is recommended to install the library in a virtual environment to avoid conflicts with other libraries 🐱
 
+By the way if you to use not mandatory modules it is necessary to install by yourself with the following command:
+
+### Installing "Other Language Models"
+
+This group allows you to use additional language models like Fireworks, Groq, Anthropic, Hugging Face, and Nvidia AI Endpoints.
+```bash
+pip install scrapegraphai[other-language-models]
+
+```
+### Installing "More Semantic Options"
+
+This group includes tools for advanced semantic processing, such as Graphviz.
+```bash
+pip install scrapegraphai[more-semantic-options]
+```
+### Installing "More Browser Options"
+
+This group includes additional browser management options, such as BrowserBase.
+```bash
+pip install scrapegraphai[more-browser-options]
+```
+
 ## 💻 Usage
 There are multiple standard scraping pipelines that can be used to extract information from a website (or local file).
 
 
@@ -9,12 +9,6 @@ markmap:
 
 ## **Short-Term Goals**
 
-- Integration with more llm APIs
-
-- Test proxy rotation implementation
-
-- Add more search engines inside the SearchInternetNode
-
 - Improve the documentation (ReadTheDocs)
     - [Issue #102](https://github.com/VinciGit00/Scrapegraph-ai/issues/102)
 
@@ -23,9 +17,6 @@ markmap:
 ## **Medium-Term Goals**
 
 - Node for handling API requests
-
-- Improve SearchGraph to look into the first 5 results of the search engine
-
 - Make scraping more deterministic
     - Create DOM tree of the website
     - HTML tag text embeddings with tags metadata
@@ -70,5 +61,3 @@ markmap:
 - Automatic generation of scraping pipelines from a given prompt
 
 - Create API for the library
-
-- Finetune a LLM for html content
@@ -194,3 +194,35 @@ We can also pass a model instance for the chat model and the embedding model. Fo
             "model_instance": embedder_model_instance
         }
     }
+
+Other LLM models
+^^^^^^^^^^^^^^^^
+
+We can also pass a model instance for the chat model and the embedding model through the **model_instance** parameter. 
+This feature enables you to utilize a Langchain model instance.
+You will discover the model you require within the provided list:
+
+- `chat model list <https://python.langchain.com/v0.2/docs/integrations/chat/#all-chat-models>`_
+- `embedding model list <https://python.langchain.com/v0.2/docs/integrations/text_embedding/#all-embedding-models>`_.
+
+For instance, consider **chat model** Moonshot. We can integrate it in the following manner:
+
+.. code-block:: python
+    
+    from langchain_community.chat_models.moonshot import MoonshotChat
+
+    # The configuration parameters are contingent upon the specific model you select
+    llm_instance_config = {
+        "model": "moonshot-v1-8k",
+        "base_url": "https://api.moonshot.cn/v1",
+        "moonshot_api_key": "MOONSHOT_API_KEY",
+    }
+
+    llm_model_instance = MoonshotChat(**llm_instance_config)
+    graph_config = {
+        "llm": {
+            "model_instance": llm_model_instance, 
+            "model_tokens": 5000
+        },
+    }
+    
@@ -0,0 +1 @@
+MOONLIGHT_API_KEY="YOUR MOONLIGHT API KEY"
@@ -0,0 +1,53 @@
+""" 
+Basic example of scraping pipeline using SmartScraper and model_instace
+"""
+
+import os, json
+from scrapegraphai.graphs import SmartScraperGraph
+from scrapegraphai.utils import prettify_exec_info
+from langchain_community.chat_models.moonshot import MoonshotChat
+from dotenv import load_dotenv
+load_dotenv()
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+
+llm_instance_config = {
+    "model": "moonshot-v1-8k",
+    "base_url": "https://api.moonshot.cn/v1",
+    "moonshot_api_key": os.getenv("MOONLIGHT_API_KEY"),
+}
+
+
+llm_model_instance = MoonshotChat(**llm_instance_config)
+
+graph_config = {
+    "llm": {
+        "model_instance": llm_model_instance, 
+        "model_tokens": 10000
+    },
+    "verbose": True,
+    "headless": True,
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+smart_scraper_graph = SmartScraperGraph(
+    prompt="List me what does the company do, the name and a contact email.",
+    source="https://scrapegraphai.com/",
+    config=graph_config
+)
+
+result = smart_scraper_graph.run()
+print(json.dumps(result, indent=4))
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
@@ -0,0 +1 @@
+MOONLIGHT_API_KEY="YOUR MOONLIGHT API KEY"
@@ -0,0 +1 @@
+This folder offer an example of how to use ScrapeGraph-AI with Moonshot and SmartScraperGraph. More usage examples can refer to openai exapmles.
@@ -0,0 +1,53 @@
+""" 
+Basic example of scraping pipeline using SmartScraper and model_instace
+"""
+
+import os, json
+from scrapegraphai.graphs import SmartScraperGraph
+from scrapegraphai.utils import prettify_exec_info
+from langchain_community.chat_models.moonshot import MoonshotChat
+from dotenv import load_dotenv
+load_dotenv()
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+
+llm_instance_config = {
+    "model": "moonshot-v1-8k",
+    "base_url": "https://api.moonshot.cn/v1",
+    "moonshot_api_key": os.getenv("MOONLIGHT_API_KEY"),
+}
+
+
+llm_model_instance = MoonshotChat(**llm_instance_config)
+
+graph_config = {
+    "llm": {
+        "model_instance": llm_model_instance, 
+        "model_tokens": 10000
+    },
+    "verbose": True,
+    "headless": True,
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+smart_scraper_graph = SmartScraperGraph(
+    prompt="List me what does the company do, the name and a contact email.",
+    source="https://scrapegraphai.com/",
+    config=graph_config
+)
+
+result = smart_scraper_graph.run()
+print(json.dumps(result, indent=4))
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
@@ -4,9 +4,7 @@ name = "scrapegraphai"
 
 version = "1.14.1"
 
-
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
-
 authors = [
     { name = "Marco Vinciguerra", email = "[email protected]" },
     { name = "Marco Perini", email = "[email protected]" },
@@ -15,32 +13,24 @@ authors = [
 
 dependencies = [
     "langchain>=0.2.14",
-    "langchain-fireworks>=0.1.3",
-    "langchain_community>=0.2.9",
     "langchain-google-genai>=1.0.7",
-    "langchain-google-vertexai>=1.0.7",
     "langchain-openai>=0.1.22",
-    "langchain-groq>=0.1.3",
-    "langchain-aws>=0.1.3",
-    "langchain-anthropic>=0.1.11",
     "langchain-mistralai>=0.1.12",
-    "langchain-huggingface>=0.0.3",
-    "langchain-nvidia-ai-endpoints>=0.1.6",
+    "langchain_community>=0.2.9",
+    "langchain-aws>=0.1.3",
     "html2text>=2024.2.26",
     "faiss-cpu>=1.8.0",
     "beautifulsoup4>=4.12.3",
     "pandas>=2.2.2",
     "python-dotenv>=1.0.1",
     "tiktoken>=0.7",
     "tqdm>=4.66.4",
-    "graphviz>=0.20.3",
     "minify-html>=0.15.0",
     "free-proxy>=1.1.1",
     "playwright>=1.43.0",
-    "google>=3.0.0",
     "undetected-playwright>=0.3.0",
+    "google>=3.0.0",
     "semchunk>=1.0.1",
-    "browserbase>=0.3.0",
 ]
 
 license = "MIT"
@@ -79,6 +69,25 @@ requires-python = ">=3.9,<4.0"
 burr = ["burr[start]==0.22.1"]
 docs = ["sphinx==6.0", "furo==2024.5.6"]
 
+# Group 1: Other Language Models
+other-language-models = [
+    "langchain-fireworks>=0.1.3",
+    "langchain-groq>=0.1.3",
+    "langchain-anthropic>=0.1.11",
+    "langchain-huggingface>=0.0.3",
+    "langchain-nvidia-ai-endpoints>=0.1.6",
+]
+
+# Group 2: More Semantic Options
+more-semantic-options = [
+    "graphviz>=0.20.3",
+]
+
+# Group 3: More Browser Options
+more-browser-options = [
+    "browserbase>=0.3.0",
+]
+
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+MOONLIGHT_API_KEY="YOUR MOONLIGHT API KEY"`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+This folder offer an example of how to use ScrapeGraph-AI with Moonshot and SmartScraperGraph. More usage examples can refer to openai exapmles.`