ScrapeGraphAI
diff --git a/‎examples/anthropic/rate_limit_haiku.py
Lines changed: 48 additions & 0 deletions b/‎examples/anthropic/rate_limit_haiku.py
Lines changed: 48 additions & 0 deletions
diff --git a/‎examples/azure/rate_limit_azure.py
Lines changed: 57 additions & 0 deletions b/‎examples/azure/rate_limit_azure.py
Lines changed: 57 additions & 0 deletions
diff --git a/‎examples/bedrock/rate_limit_bedrock.py
Lines changed: 47 additions & 0 deletions b/‎examples/bedrock/rate_limit_bedrock.py
Lines changed: 47 additions & 0 deletions
diff --git a/‎examples/deepseek/rate_limit_deepseek.py
Lines changed: 49 additions & 0 deletions b/‎examples/deepseek/rate_limit_deepseek.py
Lines changed: 49 additions & 0 deletions
diff --git a/‎examples/ernie/rate_limit_ernie.py
Lines changed: 49 additions & 0 deletions b/‎examples/ernie/rate_limit_ernie.py
Lines changed: 49 additions & 0 deletions
diff --git a/‎examples/fireworks/rate_limit_fireworks.py
Lines changed: 50 additions & 0 deletions b/‎examples/fireworks/rate_limit_fireworks.py
Lines changed: 50 additions & 0 deletions
diff --git a/‎examples/google_genai/rate_limit_gemini.py
Lines changed: 47 additions & 0 deletions b/‎examples/google_genai/rate_limit_gemini.py
Lines changed: 47 additions & 0 deletions
@@ -0,0 +1,48 @@
+""" 
+Basic example of scraping pipeline using SmartScraper while setting an API rate limit.
+"""
+
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperGraph
+from scrapegraphai.utils import prettify_exec_info
+
+
+# required environment variables in .env
+# ANTHROPIC_API_KEY
+load_dotenv()
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+graph_config = {
+    "llm": {
+        "api_key": os.getenv("ANTHROPIC_API_KEY"),
+        "model": "anthropic/claude-3-haiku-20240307",
+        "rate_limit": {
+            "requests_per_second": 1
+        }
+    },
+}
+
+smart_scraper_graph = SmartScraperGraph(
+    prompt="""Don't say anything else. Output JSON only. List me all the events, with the following fields: company_name, event_name, event_start_date, event_start_time, 
+    event_end_date, event_end_time, location, event_mode, event_category, 
+    third_party_redirect, no_of_days, 
+    time_in_hours, hosted_or_attending, refreshments_type, 
+    registration_available, registration_link""",
+    # also accepts a string with the already downloaded HTML code
+    source="https://www.hmhco.com/event",
+    config=graph_config
+)
+
+result = smart_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
@@ -0,0 +1,57 @@
+""" 
+Basic example of scraping pipeline using SmartScraper with a custom rate limit
+"""
+
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperGraph
+from scrapegraphai.utils import prettify_exec_info
+
+
+# required environment variable in .env
+# AZURE_OPENAI_ENDPOINT
+# AZURE_OPENAI_CHAT_DEPLOYMENT_NAME
+# MODEL_NAME
+# AZURE_OPENAI_API_KEY
+# OPENAI_API_TYPE
+# AZURE_OPENAI_API_VERSION
+# AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME
+load_dotenv()
+
+
+# ************************************************
+# Initialize the model instances
+# ************************************************
+
+graph_config = {
+    "llm": {
+        "api_key": os.environ["AZURE_OPENAI_KEY"],
+        "model": "azure_openai/gpt-3.5-turbo",
+        "rate_limit": {
+            "requests_per_second": 1
+        },
+    },
+    "verbose": True,
+    "headless": False
+}
+
+smart_scraper_graph = SmartScraperGraph(
+    prompt="""List me all the events, with the following fields: company_name, event_name, event_start_date, event_start_time, 
+    event_end_date, event_end_time, location, event_mode, event_category, 
+    third_party_redirect, no_of_days, 
+    time_in_hours, hosted_or_attending, refreshments_type, 
+    registration_available, registration_link""",
+    # also accepts a string with the already downloaded HTML code
+    source="https://www.hmhco.com/event",
+    config=graph_config
+)
+
+result = smart_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
@@ -0,0 +1,47 @@
+""" 
+Basic example of scraping pipeline using SmartScraper with a custom rate limit
+"""
+
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+graph_config = {
+    "llm": {
+        "client": "client_name",
+        "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
+        "temperature": 0.0,
+        "rate_limit": {
+            "requests_per_second": 1
+        },
+    }
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+smart_scraper_graph = SmartScraperGraph(
+    prompt="List me all the projects with their description",
+    # also accepts a string with the already downloaded HTML code
+    source="https://perinim.github.io/projects/",
+    config=graph_config
+)
+
+result = smart_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
@@ -0,0 +1,49 @@
+""" 
+Basic example of scraping pipeline using SmartScraper with a custom rate limit
+"""
+
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+deepseek_key = os.getenv("DEEPSEEK_APIKEY")
+
+graph_config = {
+    "llm": {
+        "model": "deepseek/deepseek-chat",
+        "api_key": deepseek_key,
+        "rate_limit": {
+            "requests_per_second": 1
+        }
+    },
+    "verbose": True,
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+smart_scraper_graph = SmartScraperGraph(
+    prompt="List me all the projects with their description.",
+    # also accepts a string with the already downloaded HTML code
+    source="https://perinim.github.io/projects/",
+    config=graph_config
+)
+
+result = smart_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
@@ -0,0 +1,49 @@
+""" 
+Basic example of scraping pipeline using SmartScraper with a custom rate limit
+"""
+
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+graph_config = {  
+    "llm": {
+        "model": "ernie/ernie-bot-turbo",
+        "ernie_client_id": "<ernie_client_id>",
+        "ernie_client_secret": "<ernie_client_secret>",
+        "temperature": 0.1,
+        "rate_limit": {
+            "requests_per_second": 1
+        },
+    },
+    "library": "beautifulsoup"
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+smart_scraper_graph = SmartScraperGraph(
+    prompt="List me all the projects with their description",
+    # also accepts a string with the already downloaded HTML code
+    source="https://perinim.github.io/projects/",
+    config=graph_config,
+)
+
+result = smart_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
@@ -0,0 +1,50 @@
+""" 
+Basic example of scraping pipeline using SmartScraper with a custom rate limit
+"""
+
+import os, json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+fireworks_api_key = os.getenv("FIREWORKS_APIKEY")
+
+graph_config = {
+    "llm": {
+        "api_key": fireworks_api_key,
+        "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct",
+        "rate_limit": {
+            "requests_per_second": 1
+        },
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+smart_scraper_graph = SmartScraperGraph(
+    prompt="List me all the projects with their description",
+    # also accepts a string with the already downloaded HTML code
+    source="https://perinim.github.io/projects/",
+    config=graph_config,
+)
+
+result = smart_scraper_graph.run()
+print(json.dumps(result, indent=4))
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
@@ -0,0 +1,47 @@
+""" 
+Basic example of scraping pipeline using SmartScraper with a custom rate limit
+"""
+
+import os
+from dotenv import load_dotenv
+from scrapegraphai.utils import prettify_exec_info
+from scrapegraphai.graphs import SmartScraperGraph
+load_dotenv()
+
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+gemini_key = os.getenv("GOOGLE_APIKEY")
+
+graph_config = {
+    "llm": {
+        "api_key": gemini_key,
+        "model": "google_genai/gemini-pro",
+        "rate_limit": {
+            "requests_per_second": 1
+        }
+    },
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+smart_scraper_graph = SmartScraperGraph(
+    prompt="List me all the news with their description.",
+    # also accepts a string with the already downloaded HTML code
+    source="https://www.wired.com",
+    config=graph_config
+)
+
+result = smart_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))