Skip to content

Commit 2859fb7

Browse files
committed
feat(AbstractGraph): add adjustable rate limit
1 parent 81af62d commit 2859fb7

File tree

16 files changed

+699
-0
lines changed

16 files changed

+699
-0
lines changed
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper while setting an API rate limit.
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
11+
# required environment variables in .env
12+
# ANTHROPIC_API_KEY
13+
load_dotenv()
14+
15+
# ************************************************
16+
# Create the SmartScraperGraph instance and run it
17+
# ************************************************
18+
19+
graph_config = {
20+
"llm": {
21+
"api_key": os.getenv("ANTHROPIC_API_KEY"),
22+
"model": "anthropic/claude-3-haiku-20240307",
23+
"rate_limit": {
24+
"requests_per_second": 1
25+
}
26+
},
27+
}
28+
29+
smart_scraper_graph = SmartScraperGraph(
30+
prompt="""Don't say anything else. Output JSON only. List me all the events, with the following fields: company_name, event_name, event_start_date, event_start_time,
31+
event_end_date, event_end_time, location, event_mode, event_category,
32+
third_party_redirect, no_of_days,
33+
time_in_hours, hosted_or_attending, refreshments_type,
34+
registration_available, registration_link""",
35+
# also accepts a string with the already downloaded HTML code
36+
source="https://www.hmhco.com/event",
37+
config=graph_config
38+
)
39+
40+
result = smart_scraper_graph.run()
41+
print(result)
42+
43+
# ************************************************
44+
# Get graph execution info
45+
# ************************************************
46+
47+
graph_exec_info = smart_scraper_graph.get_execution_info()
48+
print(prettify_exec_info(graph_exec_info))

examples/azure/rate_limit_azure.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper with a custom rate limit
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
11+
# required environment variable in .env
12+
# AZURE_OPENAI_ENDPOINT
13+
# AZURE_OPENAI_CHAT_DEPLOYMENT_NAME
14+
# MODEL_NAME
15+
# AZURE_OPENAI_API_KEY
16+
# OPENAI_API_TYPE
17+
# AZURE_OPENAI_API_VERSION
18+
# AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME
19+
load_dotenv()
20+
21+
22+
# ************************************************
23+
# Initialize the model instances
24+
# ************************************************
25+
26+
graph_config = {
27+
"llm": {
28+
"api_key": os.environ["AZURE_OPENAI_KEY"],
29+
"model": "azure_openai/gpt-3.5-turbo",
30+
"rate_limit": {
31+
"requests_per_second": 1
32+
},
33+
},
34+
"verbose": True,
35+
"headless": False
36+
}
37+
38+
smart_scraper_graph = SmartScraperGraph(
39+
prompt="""List me all the events, with the following fields: company_name, event_name, event_start_date, event_start_time,
40+
event_end_date, event_end_time, location, event_mode, event_category,
41+
third_party_redirect, no_of_days,
42+
time_in_hours, hosted_or_attending, refreshments_type,
43+
registration_available, registration_link""",
44+
# also accepts a string with the already downloaded HTML code
45+
source="https://www.hmhco.com/event",
46+
config=graph_config
47+
)
48+
49+
result = smart_scraper_graph.run()
50+
print(result)
51+
52+
# ************************************************
53+
# Get graph execution info
54+
# ************************************************
55+
56+
graph_exec_info = smart_scraper_graph.get_execution_info()
57+
print(prettify_exec_info(graph_exec_info))
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper with a custom rate limit
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
12+
13+
# ************************************************
14+
# Define the configuration for the graph
15+
# ************************************************
16+
17+
graph_config = {
18+
"llm": {
19+
"client": "client_name",
20+
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
21+
"temperature": 0.0,
22+
"rate_limit": {
23+
"requests_per_second": 1
24+
},
25+
}
26+
}
27+
28+
# ************************************************
29+
# Create the SmartScraperGraph instance and run it
30+
# ************************************************
31+
32+
smart_scraper_graph = SmartScraperGraph(
33+
prompt="List me all the projects with their description",
34+
# also accepts a string with the already downloaded HTML code
35+
source="https://perinim.github.io/projects/",
36+
config=graph_config
37+
)
38+
39+
result = smart_scraper_graph.run()
40+
print(result)
41+
42+
# ************************************************
43+
# Get graph execution info
44+
# ************************************************
45+
46+
graph_exec_info = smart_scraper_graph.get_execution_info()
47+
print(prettify_exec_info(graph_exec_info))
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper with a custom rate limit
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
12+
13+
# ************************************************
14+
# Define the configuration for the graph
15+
# ************************************************
16+
17+
deepseek_key = os.getenv("DEEPSEEK_APIKEY")
18+
19+
graph_config = {
20+
"llm": {
21+
"model": "deepseek/deepseek-chat",
22+
"api_key": deepseek_key,
23+
"rate_limit": {
24+
"requests_per_second": 1
25+
}
26+
},
27+
"verbose": True,
28+
}
29+
30+
# ************************************************
31+
# Create the SmartScraperGraph instance and run it
32+
# ************************************************
33+
34+
smart_scraper_graph = SmartScraperGraph(
35+
prompt="List me all the projects with their description.",
36+
# also accepts a string with the already downloaded HTML code
37+
source="https://perinim.github.io/projects/",
38+
config=graph_config
39+
)
40+
41+
result = smart_scraper_graph.run()
42+
print(result)
43+
44+
# ************************************************
45+
# Get graph execution info
46+
# ************************************************
47+
48+
graph_exec_info = smart_scraper_graph.get_execution_info()
49+
print(prettify_exec_info(graph_exec_info))

examples/ernie/rate_limit_ernie.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper with a custom rate limit
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
12+
13+
# ************************************************
14+
# Define the configuration for the graph
15+
# ************************************************
16+
17+
graph_config = {
18+
"llm": {
19+
"model": "ernie/ernie-bot-turbo",
20+
"ernie_client_id": "<ernie_client_id>",
21+
"ernie_client_secret": "<ernie_client_secret>",
22+
"temperature": 0.1,
23+
"rate_limit": {
24+
"requests_per_second": 1
25+
},
26+
},
27+
"library": "beautifulsoup"
28+
}
29+
30+
# ************************************************
31+
# Create the SmartScraperGraph instance and run it
32+
# ************************************************
33+
34+
smart_scraper_graph = SmartScraperGraph(
35+
prompt="List me all the projects with their description",
36+
# also accepts a string with the already downloaded HTML code
37+
source="https://perinim.github.io/projects/",
38+
config=graph_config,
39+
)
40+
41+
result = smart_scraper_graph.run()
42+
print(result)
43+
44+
# ************************************************
45+
# Get graph execution info
46+
# ************************************************
47+
48+
graph_exec_info = smart_scraper_graph.get_execution_info()
49+
print(prettify_exec_info(graph_exec_info))
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper with a custom rate limit
3+
"""
4+
5+
import os, json
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
12+
13+
# ************************************************
14+
# Define the configuration for the graph
15+
# ************************************************
16+
17+
fireworks_api_key = os.getenv("FIREWORKS_APIKEY")
18+
19+
graph_config = {
20+
"llm": {
21+
"api_key": fireworks_api_key,
22+
"model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct",
23+
"rate_limit": {
24+
"requests_per_second": 1
25+
},
26+
},
27+
"verbose": True,
28+
"headless": False,
29+
}
30+
31+
# ************************************************
32+
# Create the SmartScraperGraph instance and run it
33+
# ************************************************
34+
35+
smart_scraper_graph = SmartScraperGraph(
36+
prompt="List me all the projects with their description",
37+
# also accepts a string with the already downloaded HTML code
38+
source="https://perinim.github.io/projects/",
39+
config=graph_config,
40+
)
41+
42+
result = smart_scraper_graph.run()
43+
print(json.dumps(result, indent=4))
44+
45+
# ************************************************
46+
# Get graph execution info
47+
# ************************************************
48+
49+
graph_exec_info = smart_scraper_graph.get_execution_info()
50+
print(prettify_exec_info(graph_exec_info))
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper with a custom rate limit
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.utils import prettify_exec_info
8+
from scrapegraphai.graphs import SmartScraperGraph
9+
load_dotenv()
10+
11+
12+
# ************************************************
13+
# Define the configuration for the graph
14+
# ************************************************
15+
16+
gemini_key = os.getenv("GOOGLE_APIKEY")
17+
18+
graph_config = {
19+
"llm": {
20+
"api_key": gemini_key,
21+
"model": "google_genai/gemini-pro",
22+
"rate_limit": {
23+
"requests_per_second": 1
24+
}
25+
},
26+
}
27+
28+
# ************************************************
29+
# Create the SmartScraperGraph instance and run it
30+
# ************************************************
31+
32+
smart_scraper_graph = SmartScraperGraph(
33+
prompt="List me all the news with their description.",
34+
# also accepts a string with the already downloaded HTML code
35+
source="https://www.wired.com",
36+
config=graph_config
37+
)
38+
39+
result = smart_scraper_graph.run()
40+
print(result)
41+
42+
# ************************************************
43+
# Get graph execution info
44+
# ************************************************
45+
46+
graph_exec_info = smart_scraper_graph.get_execution_info()
47+
print(prettify_exec_info(graph_exec_info))

0 commit comments

Comments
 (0)