ScrapeGraphAI
diff --git a/‎CHANGELOG.md
Lines changed: 35 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 35 additions & 0 deletions
diff --git a/‎examples/mistral/.env.example
Lines changed: 1 addition & 0 deletions b/‎examples/mistral/.env.example
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/mistral/csv_scraper_graph_multi_mistral.py
Lines changed: 56 additions & 0 deletions b/‎examples/mistral/csv_scraper_graph_multi_mistral.py
Lines changed: 56 additions & 0 deletions
diff --git a/‎examples/mistral/csv_scraper_mistral.py
Lines changed: 57 additions & 0 deletions b/‎examples/mistral/csv_scraper_mistral.py
Lines changed: 57 additions & 0 deletions
diff --git a/‎examples/mistral/custom_graph_mistral.py
Lines changed: 109 additions & 0 deletions b/‎examples/mistral/custom_graph_mistral.py
Lines changed: 109 additions & 0 deletions
diff --git a/‎examples/mistral/deep_scraper_mistral.py
Lines changed: 47 additions & 0 deletions b/‎examples/mistral/deep_scraper_mistral.py
Lines changed: 47 additions & 0 deletions
@@ -1,3 +1,38 @@
+## [1.13.0-beta.5](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.13.0-beta.4...v1.13.0-beta.5) (2024-08-08)
+
+
+### Bug Fixes
+
+* **chunking:** count tokens from words instead of characters ([5ec2de9](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5ec2de9e1a14def5596738b6cdf769f5039a246d)), closes [#513](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/513)
+
+## [1.13.0-beta.4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.13.0-beta.3...v1.13.0-beta.4) (2024-08-07)
+
+
+### Bug Fixes
+
+* refactoring of merge_answer_node ([898e5a7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/898e5a7af504fbf4c1cabb14103e66184037de49))
+
+## [1.13.0-beta.3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.13.0-beta.2...v1.13.0-beta.3) (2024-08-07)
+
+
+### Features
+
+* add mistral support ([17f2707](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/17f2707313f65a1e96443b3c8a1f5137892f2c5a))
+
+
+### Bug Fixes
+
+* **FetchNode:** handling of missing browser_base key ([07720b6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/07720b6e0ca10ba6ce3c1359706a09baffcc4ad0))
+* **AbstractGraph:** LangChain warnings handling, Mistral tokens ([786af99](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/786af992f8fbdadfdc3d2d6a06c0cfd81289f8f2))
+
+
+### chore
+
+* **models_tokens:** add mistral models ([5e82432](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5e824327c3acb69d53f3519344d0f8c2e3defa8b))
+* **mistral:** create examples ([f8ad616](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f8ad616e10c271443e2dcb4123c8ddb91de2ff69))
+* **examples:** fix Mistral examples ([b0ffc51](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/b0ffc51e5415caec562a565710f5195afe1fbcb2))
+* update requirements for mistral ([9868555](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/986855512319541d1d02356df9ad61ab7fc5d807))
+
 ## [1.13.0-beta.2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.13.0-beta.1...v1.13.0-beta.2) (2024-08-07)
 
 
 
@@ -0,0 +1 @@
+MISTRAL_API_KEY="YOUR MISTRAL API KEY"
@@ -0,0 +1,56 @@
+"""
+Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
+"""
+
+import os
+from dotenv import load_dotenv
+import pandas as pd
+from scrapegraphai.graphs import CSVScraperMultiGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+load_dotenv()
+# ************************************************
+# Read the CSV file
+# ************************************************
+
+FILE_NAME = "inputs/username.csv"
+curr_dir = os.path.dirname(os.path.realpath(__file__))
+file_path = os.path.join(curr_dir, FILE_NAME)
+
+text = pd.read_csv(file_path)
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+mistral_key = os.getenv("MISTRAL_API_KEY")
+
+graph_config = {
+     "llm": {
+        "api_key": mistral_key,
+        "model": "mistral/open-mistral-nemo",
+    },
+}
+
+# ************************************************
+# Create the CSVScraperMultiGraph instance and run it
+# ************************************************
+
+csv_scraper_graph = CSVScraperMultiGraph(
+    prompt="List me all the last names",
+    source=[str(text), str(text)],
+    config=graph_config
+)
+
+result = csv_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = csv_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json or csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
@@ -0,0 +1,57 @@
+"""
+Basic example of scraping pipeline using CSVScraperGraph from CSV documents
+"""
+
+import os
+from dotenv import load_dotenv
+import pandas as pd
+from scrapegraphai.graphs import CSVScraperGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+load_dotenv()
+
+# ************************************************
+# Read the CSV file
+# ************************************************
+
+FILE_NAME = "inputs/username.csv"
+curr_dir = os.path.dirname(os.path.realpath(__file__))
+file_path = os.path.join(curr_dir, FILE_NAME)
+
+text = pd.read_csv(file_path)
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+mistral_key = os.getenv("MISTRAL_API_KEY")
+
+graph_config = {
+    "llm": {
+        "api_key": mistral_key,
+        "model": "mistral/open-mistral-nemo",
+    },
+}
+
+# ************************************************
+# Create the CSVScraperGraph instance and run it
+# ************************************************
+
+csv_scraper_graph = CSVScraperGraph(
+    prompt="List me all the last names",
+    source=str(text),  # Pass the content of the file, not the file object
+    config=graph_config
+)
+
+result = csv_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = csv_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json or csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
@@ -0,0 +1,109 @@
+"""
+Example of custom graph using existing nodes
+"""
+
+import os
+from dotenv import load_dotenv
+
+from langchain_mistralai import ChatMistralAI, MistralAIEmbeddings
+from scrapegraphai.graphs import BaseGraph
+from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode
+load_dotenv()
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+mistral_key = os.getenv("MISTRAL_API_KEY")
+graph_config = {
+     "llm": {
+        "api_key": mistral_key,
+        "model": "mistral/open-mistral-nemo",
+    },
+}
+
+# ************************************************
+# Define the graph nodes
+# ************************************************
+
+llm_model = ChatMistralAI(**graph_config["llm"])
+embedder = MistralAIEmbeddings(api_key=llm_model.mistral_api_key)
+
+# define the nodes for the graph
+robot_node = RobotsNode(
+    input="url",
+    output=["is_scrapable"],
+    node_config={
+        "llm_model": llm_model,
+        "force_scraping": True,
+        "verbose": True,
+        }
+)
+
+fetch_node = FetchNode(
+    input="url | local_dir",
+    output=["doc", "link_urls", "img_urls"],
+    node_config={
+        "verbose": True,
+        "headless": True,
+    }
+)
+parse_node = ParseNode(
+    input="doc",
+    output=["parsed_doc"],
+    node_config={
+        "chunk_size": 4096,
+        "verbose": True,
+    }
+)
+rag_node = RAGNode(
+    input="user_prompt & (parsed_doc | doc)",
+    output=["relevant_chunks"],
+    node_config={
+        "llm_model": llm_model,
+        "embedder_model": embedder,
+        "verbose": True,
+    }
+)
+generate_answer_node = GenerateAnswerNode(
+    input="user_prompt & (relevant_chunks | parsed_doc | doc)",
+    output=["answer"],
+    node_config={
+        "llm_model": llm_model,
+        "verbose": True,
+    }
+)
+
+# ************************************************
+# Create the graph by defining the connections
+# ************************************************
+
+graph = BaseGraph(
+    nodes=[
+        robot_node,
+        fetch_node,
+        parse_node,
+        rag_node,
+        generate_answer_node,
+    ],
+    edges=[
+        (robot_node, fetch_node),
+        (fetch_node, parse_node),
+        (parse_node, rag_node),
+        (rag_node, generate_answer_node)
+    ],
+    entry_point=robot_node
+)
+
+# ************************************************
+# Execute the graph
+# ************************************************
+
+result, execution_info = graph.execute({
+    "user_prompt": "Describe the content",
+    "url": "https://example.com/"
+})
+
+# get the answer from the result
+result = result.get("answer", "No answer found.")
+print(result)
@@ -0,0 +1,47 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import DeepScraperGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+mistral_key = os.getenv("MISTRAL_API_KEY")
+
+graph_config = {
+    "llm": {
+        "api_key": mistral_key,
+        "model": "mistral/open-mistral-nemo",
+    },
+    "verbose": True,
+    "max_depth": 1
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+deep_scraper_graph = DeepScraperGraph(
+    prompt="List me all the job titles and detailed job description.",
+    # also accepts a string with the already downloaded HTML code
+    source="https://www.google.com/about/careers/applications/jobs/results/?location=Bangalore%20India",
+    config=graph_config
+)
+
+result = deep_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = deep_scraper_graph.get_execution_info()
+print(deep_scraper_graph.get_state("relevant_links"))
+print(prettify_exec_info(graph_exec_info))