ScrapeGraphAI · VinciGit00 · Aug 7, 2024 · Aug 6, 2024 · Aug 6, 2024 · Aug 6, 2024
diff --git a/examples/mistral/.env.example b/examples/mistral/.env.example
@@ -0,0 +1 @@
+MISTRAL_API_KEY="YOUR MISTRAL API KEY"
diff --git a/examples/mistral/csv_scraper_graph_multi_mistral.py b/examples/mistral/csv_scraper_graph_multi_mistral.py
@@ -0,0 +1,56 @@
+"""
+Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
+"""
+
+import os
+from dotenv import load_dotenv
+import pandas as pd
+from scrapegraphai.graphs import CSVScraperMultiGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+load_dotenv()
+# ************************************************
+# Read the CSV file
+# ************************************************
+
+FILE_NAME = "inputs/username.csv"
+curr_dir = os.path.dirname(os.path.realpath(__file__))
+file_path = os.path.join(curr_dir, FILE_NAME)
+
+text = pd.read_csv(file_path)
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+mistral_key = os.getenv("MISTRAL_API_KEY")
+
+graph_config = {
+     "llm": {
+        "api_key": mistral_key,
+        "model": "mistral/open-mistral-nemo",
+    },
+}
+
+# ************************************************
+# Create the CSVScraperMultiGraph instance and run it
+# ************************************************
+
+csv_scraper_graph = CSVScraperMultiGraph(
+    prompt="List me all the last names",
+    source=[str(text), str(text)],
+    config=graph_config
+)
+
+result = csv_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = csv_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json or csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
diff --git a/examples/mistral/csv_scraper_mistral.py b/examples/mistral/csv_scraper_mistral.py
@@ -0,0 +1,57 @@
+"""
+Basic example of scraping pipeline using CSVScraperGraph from CSV documents
+"""
+
+import os
+from dotenv import load_dotenv
+import pandas as pd
+from scrapegraphai.graphs import CSVScraperGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+load_dotenv()
+
+# ************************************************
+# Read the CSV file
+# ************************************************
+
+FILE_NAME = "inputs/username.csv"
+curr_dir = os.path.dirname(os.path.realpath(__file__))
+file_path = os.path.join(curr_dir, FILE_NAME)
+
+text = pd.read_csv(file_path)
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+mistral_key = os.getenv("MISTRAL_API_KEY")
+
+graph_config = {
+    "llm": {
+        "api_key": mistral_key,
+        "model": "mistral/open-mistral-nemo",
+    },
+}
+
+# ************************************************
+# Create the CSVScraperGraph instance and run it
+# ************************************************
+
+csv_scraper_graph = CSVScraperGraph(
+    prompt="List me all the last names",
+    source=str(text),  # Pass the content of the file, not the file object
+    config=graph_config
+)
+
+result = csv_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = csv_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json or csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
diff --git a/examples/mistral/custom_graph_mistral.py b/examples/mistral/custom_graph_mistral.py
@@ -0,0 +1,109 @@
+"""
+Example of custom graph using existing nodes
+"""
+
+import os
+from dotenv import load_dotenv
+
+from langchain_mistralai import ChatMistralAI, MistralAIEmbeddings
+from scrapegraphai.graphs import BaseGraph
+from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode
+load_dotenv()
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+mistral_key = os.getenv("MISTRAL_API_KEY")
+graph_config = {
+     "llm": {
+        "api_key": mistral_key,
+        "model": "mistral/open-mistral-nemo",
+    },
+}
+
+# ************************************************
+# Define the graph nodes
+# ************************************************
+
+llm_model = ChatMistralAI(**graph_config["llm"])
+embedder = MistralAIEmbeddings(api_key=llm_model.mistral_api_key)
+
+# define the nodes for the graph
+robot_node = RobotsNode(
+    input="url",
+    output=["is_scrapable"],
+    node_config={
+        "llm_model": llm_model,
+        "force_scraping": True,
+        "verbose": True,
+        }
+)
+
+fetch_node = FetchNode(
+    input="url | local_dir",
+    output=["doc", "link_urls", "img_urls"],
+    node_config={
+        "verbose": True,
+        "headless": True,
+    }
+)
+parse_node = ParseNode(
+    input="doc",
+    output=["parsed_doc"],
+    node_config={
+        "chunk_size": 4096,
+        "verbose": True,
+    }
+)
+rag_node = RAGNode(
+    input="user_prompt & (parsed_doc | doc)",
+    output=["relevant_chunks"],
+    node_config={
+        "llm_model": llm_model,
+        "embedder_model": embedder,
+        "verbose": True,
+    }
+)
+generate_answer_node = GenerateAnswerNode(
+    input="user_prompt & (relevant_chunks | parsed_doc | doc)",
+    output=["answer"],
+    node_config={
+        "llm_model": llm_model,
+        "verbose": True,
+    }
+)
+
+# ************************************************
+# Create the graph by defining the connections
+# ************************************************
+
+graph = BaseGraph(
+    nodes=[
+        robot_node,
+        fetch_node,
+        parse_node,
+        rag_node,
+        generate_answer_node,
+    ],
+    edges=[
+        (robot_node, fetch_node),
+        (fetch_node, parse_node),
+        (parse_node, rag_node),
+        (rag_node, generate_answer_node)
+    ],
+    entry_point=robot_node
+)
+
+# ************************************************
+# Execute the graph
+# ************************************************
+
+result, execution_info = graph.execute({
+    "user_prompt": "Describe the content",
+    "url": "https://example.com/"
+})
+
+# get the answer from the result
+result = result.get("answer", "No answer found.")
+print(result)
diff --git a/examples/mistral/deep_scraper_mistral.py b/examples/mistral/deep_scraper_mistral.py
@@ -0,0 +1,47 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import DeepScraperGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+mistral_key = os.getenv("MISTRAL_API_KEY")
+
+graph_config = {
+    "llm": {
+        "api_key": mistral_key,
+        "model": "mistral/open-mistral-nemo",
+    },
+    "verbose": True,
+    "max_depth": 1
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+deep_scraper_graph = DeepScraperGraph(
+    prompt="List me all the job titles and detailed job description.",
+    # also accepts a string with the already downloaded HTML code
+    source="https://www.google.com/about/careers/applications/jobs/results/?location=Bangalore%20India",
+    config=graph_config
+)
+
+result = deep_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = deep_scraper_graph.get_execution_info()
+print(deep_scraper_graph.get_state("relevant_links"))
+print(prettify_exec_info(graph_exec_info))
diff --git a/examples/mistral/inputs/books.xml b/examples/mistral/inputs/books.xml
@@ -0,0 +1,120 @@
+<?xml version="1.0"?>
+<catalog>
+   <book id="bk101">
+      <author>Gambardella, Matthew</author>
+      <title>XML Developer's Guide</title>
+      <genre>Computer</genre>
+      <price>44.95</price>
+      <publish_date>2000-10-01</publish_date>
+      <description>An in-depth look at creating applications 
+      with XML.</description>
+   </book>
+   <book id="bk102">
+      <author>Ralls, Kim</author>
+      <title>Midnight Rain</title>
+      <genre>Fantasy</genre>
+      <price>5.95</price>
+      <publish_date>2000-12-16</publish_date>
+      <description>A former architect battles corporate zombies, 
+      an evil sorceress, and her own childhood to become queen 
+      of the world.</description>
+   </book>
+   <book id="bk103">
+      <author>Corets, Eva</author>
+      <title>Maeve Ascendant</title>
+      <genre>Fantasy</genre>
+      <price>5.95</price>
+      <publish_date>2000-11-17</publish_date>
+      <description>After the collapse of a nanotechnology 
+      society in England, the young survivors lay the 
+      foundation for a new society.</description>
+   </book>
+   <book id="bk104">
+      <author>Corets, Eva</author>
+      <title>Oberon's Legacy</title>
+      <genre>Fantasy</genre>
+      <price>5.95</price>
+      <publish_date>2001-03-10</publish_date>
+      <description>In post-apocalypse England, the mysterious 
+      agent known only as Oberon helps to create a new life 
+      for the inhabitants of London. Sequel to Maeve 
+      Ascendant.</description>
+   </book>
+   <book id="bk105">
+      <author>Corets, Eva</author>
+      <title>The Sundered Grail</title>
+      <genre>Fantasy</genre>
+      <price>5.95</price>
+      <publish_date>2001-09-10</publish_date>
+      <description>The two daughters of Maeve, half-sisters, 
+      battle one another for control of England. Sequel to 
+      Oberon's Legacy.</description>
+   </book>
+   <book id="bk106">
+      <author>Randall, Cynthia</author>
+      <title>Lover Birds</title>
+      <genre>Romance</genre>
+      <price>4.95</price>
+      <publish_date>2000-09-02</publish_date>
+      <description>When Carla meets Paul at an ornithology 
+      conference, tempers fly as feathers get ruffled.</description>
+   </book>
+   <book id="bk107">
+      <author>Thurman, Paula</author>
+      <title>Splish Splash</title>
+      <genre>Romance</genre>
+      <price>4.95</price>
+      <publish_date>2000-11-02</publish_date>
+      <description>A deep sea diver finds true love twenty 
+      thousand leagues beneath the sea.</description>
+   </book>
+   <book id="bk108">
+      <author>Knorr, Stefan</author>
+      <title>Creepy Crawlies</title>
+      <genre>Horror</genre>
+      <price>4.95</price>
+      <publish_date>2000-12-06</publish_date>
+      <description>An anthology of horror stories about roaches,
+      centipedes, scorpions  and other insects.</description>
+   </book>
+   <book id="bk109">
+      <author>Kress, Peter</author>
+      <title>Paradox Lost</title>
+      <genre>Science Fiction</genre>
+      <price>6.95</price>
+      <publish_date>2000-11-02</publish_date>
+      <description>After an inadvertant trip through a Heisenberg
+      Uncertainty Device, James Salway discovers the problems 
+      of being quantum.</description>
+   </book>
+   <book id="bk110">
+      <author>O'Brien, Tim</author>
+      <title>Microsoft .NET: The Programming Bible</title>
+      <genre>Computer</genre>
+      <price>36.95</price>
+      <publish_date>2000-12-09</publish_date>
+      <description>Microsoft's .NET initiative is explored in 
+      detail in this deep programmer's reference.</description>
+   </book>
+   <book id="bk111">
+      <author>O'Brien, Tim</author>
+      <title>MSXML3: A Comprehensive Guide</title>
+      <genre>Computer</genre>
+      <price>36.95</price>
+      <publish_date>2000-12-01</publish_date>
+      <description>The Microsoft MSXML3 parser is covered in 
+      detail, with attention to XML DOM interfaces, XSLT processing, 
+      SAX and more.</description>
+   </book>
+   <book id="bk112">
+      <author>Galos, Mike</author>
+      <title>Visual Studio 7: A Comprehensive Guide</title>
+      <genre>Computer</genre>
+      <price>49.95</price>
+      <publish_date>2001-04-16</publish_date>
+      <description>Microsoft Visual Studio 7 is explored in depth,
+      looking at how Visual Basic, Visual C++, C#, and ASP+ are 
+      integrated into a comprehensive development 
+      environment.</description>
+   </book>
+</catalog>