Skip to content

Commit a10b060

Browse files
authored
Merge pull request #361 from VinciGit00/multi_scraper_implementation
Multi scraper implementation
2 parents 893aadd + 5d692bf commit a10b060

19 files changed

+941
-34
lines changed
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
"""
2+
Basic example of scraping pipeline using ScriptCreatorGraph
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import ScriptCreatorMultiGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
12+
# ************************************************
13+
# Define the configuration for the graph
14+
# ************************************************
15+
16+
graph_config = {
17+
"llm": {
18+
"api_key": os.getenv("ANTHROPIC_API_KEY"),
19+
"model": "claude-3-haiku-20240307",
20+
"max_tokens": 4000
21+
},
22+
"library": "beautifulsoup"
23+
}
24+
25+
# ************************************************
26+
# Create the ScriptCreatorGraph instance and run it
27+
# ************************************************
28+
29+
urls=[
30+
"https://schultzbergagency.com/emil-raste-karlsen/",
31+
"https://schultzbergagency.com/johanna-hedberg/",
32+
]
33+
34+
# ************************************************
35+
# Create the ScriptCreatorGraph instance and run it
36+
# ************************************************
37+
38+
script_creator_graph = ScriptCreatorMultiGraph(
39+
prompt="Find information about actors",
40+
# also accepts a string with the already downloaded HTML code
41+
source=urls,
42+
config=graph_config
43+
)
44+
45+
result = script_creator_graph.run()
46+
print(result)
47+
48+
# ************************************************
49+
# Get graph execution info
50+
# ************************************************
51+
52+
graph_exec_info = script_creator_graph.get_execution_info()
53+
print(prettify_exec_info(graph_exec_info))

examples/anthropic/smart_scraper_multi_haiku.py

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,31 +12,14 @@
1212
# Define the configuration for the graph
1313
# ************************************************
1414

15-
openai_key = os.getenv("OPENAI_APIKEY")
16-
17-
"""
18-
Basic example of scraping pipeline using SmartScraper
19-
"""
20-
21-
import os, json
22-
from dotenv import load_dotenv
23-
from scrapegraphai.graphs import SmartScraperMultiGraph
24-
2515
load_dotenv()
2616

27-
# ************************************************
28-
# Define the configuration for the graph
29-
# ************************************************
30-
31-
openai_key = os.getenv("OPENAI_APIKEY")
32-
3317
graph_config = {
3418
"llm": {
35-
"api_key": openai_key,
36-
"model": "gpt-4o",
37-
},
38-
"verbose": True,
39-
"headless": False,
19+
"api_key": os.getenv("ANTHROPIC_API_KEY"),
20+
"model": "claude-3-haiku-20240307",
21+
"max_tokens": 4000
22+
},
4023
}
4124

4225
# *******************************************************

examples/azure/script_generator_azure.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525
)
2626
graph_config = {
2727
"llm": {"model_instance": llm_model_instance},
28-
"embeddings": {"model_instance": embedder_model_instance}
28+
"embeddings": {"model_instance": embedder_model_instance},
29+
"library": "beautifulsoup"
2930
}
3031

3132
# ************************************************
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
"""
2+
Basic example of scraping pipeline using ScriptCreatorGraph
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import ScriptCreatorMultiGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
from langchain_openai import AzureChatOpenAI
10+
from langchain_openai import AzureOpenAIEmbeddings
11+
12+
load_dotenv()
13+
14+
# ************************************************
15+
# Define the configuration for the graph
16+
# ************************************************
17+
llm_model_instance = AzureChatOpenAI(
18+
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
19+
azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"]
20+
)
21+
22+
embedder_model_instance = AzureOpenAIEmbeddings(
23+
azure_deployment=os.environ["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME"],
24+
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
25+
)
26+
graph_config = {
27+
"llm": {"model_instance": llm_model_instance},
28+
"embeddings": {"model_instance": embedder_model_instance},
29+
"library": "beautifulsoup"
30+
}
31+
32+
33+
# ************************************************
34+
# Create the ScriptCreatorGraph instance and run it
35+
# ************************************************
36+
37+
urls=[
38+
"https://schultzbergagency.com/emil-raste-karlsen/",
39+
"https://schultzbergagency.com/johanna-hedberg/",
40+
]
41+
42+
# ************************************************
43+
# Create the ScriptCreatorGraph instance and run it
44+
# ************************************************
45+
46+
script_creator_graph = ScriptCreatorMultiGraph(
47+
prompt="Find information about actors",
48+
# also accepts a string with the already downloaded HTML code
49+
source=urls,
50+
config=graph_config
51+
)
52+
53+
result = script_creator_graph.run()
54+
print(result)
55+
56+
# ************************************************
57+
# Get graph execution info
58+
# ************************************************
59+
60+
graph_exec_info = script_creator_graph.get_execution_info()
61+
print(prettify_exec_info(graph_exec_info))
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
"""
2+
Basic example of scraping pipeline using ScriptCreatorGraph
3+
"""
4+
5+
from scrapegraphai.graphs import ScriptCreatorMultiGraph
6+
from scrapegraphai.utils import prettify_exec_info
7+
8+
# ************************************************
9+
# Define the configuration for the graph
10+
# ************************************************
11+
12+
graph_config = {
13+
"llm": {
14+
"client": "client_name",
15+
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
16+
"temperature": 0.0
17+
},
18+
"embeddings": {
19+
"model": "bedrock/cohere.embed-multilingual-v3"
20+
},
21+
"library": "beautifulsoup"
22+
}
23+
24+
# ************************************************
25+
# Create the ScriptCreatorGraph instance and run it
26+
# ************************************************
27+
28+
urls=[
29+
"https://schultzbergagency.com/emil-raste-karlsen/",
30+
"https://schultzbergagency.com/johanna-hedberg/",
31+
]
32+
33+
# ************************************************
34+
# Create the ScriptCreatorGraph instance and run it
35+
# ************************************************
36+
37+
script_creator_graph = ScriptCreatorMultiGraph(
38+
prompt="Find information about actors",
39+
# also accepts a string with the already downloaded HTML code
40+
source=urls,
41+
config=graph_config
42+
)
43+
44+
result = script_creator_graph.run()
45+
print(result)
46+
47+
# ************************************************
48+
# Get graph execution info
49+
# ************************************************
50+
51+
graph_exec_info = script_creator_graph.get_execution_info()
52+
print(prettify_exec_info(graph_exec_info))
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
"""
2+
Basic example of scraping pipeline using ScriptCreatorGraph
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import ScriptCreatorMultiGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
12+
# ************************************************
13+
# Define the configuration for the graph
14+
# ************************************************
15+
16+
deepseek_key = os.getenv("DEEPSEEK_APIKEY")
17+
18+
graph_config = {
19+
"llm": {
20+
"model": "deepseek-chat",
21+
"openai_api_key": deepseek_key,
22+
"openai_api_base": 'https://api.deepseek.com/v1',
23+
},
24+
"embeddings": {
25+
"model": "ollama/nomic-embed-text",
26+
"temperature": 0,
27+
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily
28+
},
29+
"library": "beautifulsoup"
30+
}
31+
32+
# ************************************************
33+
# Create the ScriptCreatorGraph instance and run it
34+
# ************************************************
35+
36+
urls=[
37+
"https://schultzbergagency.com/emil-raste-karlsen/",
38+
"https://schultzbergagency.com/johanna-hedberg/",
39+
]
40+
41+
# ************************************************
42+
# Create the ScriptCreatorGraph instance and run it
43+
# ************************************************
44+
45+
script_creator_graph = ScriptCreatorMultiGraph(
46+
prompt="Find information about actors",
47+
# also accepts a string with the already downloaded HTML code
48+
source=urls,
49+
config=graph_config
50+
)
51+
52+
result = script_creator_graph.run()
53+
print(result)
54+
55+
# ************************************************
56+
# Get graph execution info
57+
# ************************************************
58+
59+
graph_exec_info = script_creator_graph.get_execution_info()
60+
print(prettify_exec_info(graph_exec_info))
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
"""
2+
Basic example of scraping pipeline using ScriptCreatorGraph
3+
"""
4+
5+
from scrapegraphai.graphs import ScriptCreatorMultiGraph
6+
from scrapegraphai.utils import prettify_exec_info
7+
8+
# ************************************************
9+
# Define the configuration for the graph
10+
# ************************************************
11+
12+
graph_config = {
13+
"llm": {
14+
"model": "ernie-bot-turbo",
15+
"ernie_client_id": "<ernie_client_id>",
16+
"ernie_client_secret": "<ernie_client_secret>",
17+
"temperature": 0.1
18+
},
19+
"embeddings": {
20+
"model": "ollama/nomic-embed-text",
21+
"temperature": 0,
22+
"base_url": "http://localhost:11434"},
23+
"library": "beautifulsoup"
24+
}
25+
26+
# ************************************************
27+
# Create the ScriptCreatorGraph instance and run it
28+
# ************************************************
29+
30+
urls=[
31+
"https://schultzbergagency.com/emil-raste-karlsen/",
32+
"https://schultzbergagency.com/johanna-hedberg/",
33+
]
34+
35+
# ************************************************
36+
# Create the ScriptCreatorGraph instance and run it
37+
# ************************************************
38+
39+
script_creator_graph = ScriptCreatorMultiGraph(
40+
prompt="Find information about actors",
41+
# also accepts a string with the already downloaded HTML code
42+
source=urls,
43+
config=graph_config
44+
)
45+
46+
result = script_creator_graph.run()
47+
print(result)
48+
49+
# ************************************************
50+
# Get graph execution info
51+
# ************************************************
52+
53+
graph_exec_info = script_creator_graph.get_execution_info()
54+
print(prettify_exec_info(graph_exec_info))
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
"""
2+
Basic example of scraping pipeline using ScriptCreatorGraph
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import ScriptCreatorMultiGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
12+
# ************************************************
13+
# Define the configuration for the graph
14+
# ************************************************
15+
16+
gemini_key = os.getenv("GOOGLE_APIKEY")
17+
18+
graph_config = {
19+
"llm": {
20+
"api_key": gemini_key,
21+
"model": "gemini-pro",
22+
},
23+
"library": "beautifoulsoup"
24+
}
25+
26+
# ************************************************
27+
# Create the ScriptCreatorGraph instance and run it
28+
# ************************************************
29+
30+
urls=[
31+
"https://schultzbergagency.com/emil-raste-karlsen/",
32+
"https://schultzbergagency.com/johanna-hedberg/",
33+
]
34+
35+
# ************************************************
36+
# Create the ScriptCreatorGraph instance and run it
37+
# ************************************************
38+
39+
script_creator_graph = ScriptCreatorMultiGraph(
40+
prompt="Find information about actors",
41+
# also accepts a string with the already downloaded HTML code
42+
source=urls,
43+
config=graph_config
44+
)
45+
46+
result = script_creator_graph.run()
47+
print(result)
48+
49+
# ************************************************
50+
# Get graph execution info
51+
# ************************************************
52+
53+
graph_exec_info = script_creator_graph.get_execution_info()
54+
print(prettify_exec_info(graph_exec_info))

0 commit comments

Comments
 (0)