Skip to content

Commit 2991ca8

Browse files
committed
add examples smart scraper lite
1 parent f576afa commit 2991ca8

19 files changed

+588
-0
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
import os
5+
import json
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperMultiLiteGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
12+
graph_config = {
13+
"llm": {
14+
"api_key": os.getenv("ANTHROPIC_API_KEY"),
15+
"model": "anthropic/claude-3-haiku-20240307",
16+
},
17+
"verbose": True,
18+
"headless": False,
19+
}
20+
21+
smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
22+
prompt="Who is Marco Perini?",
23+
source= [
24+
"https://perinim.github.io/",
25+
"https://perinim.github.io/cv/"
26+
],
27+
config=graph_config
28+
)
29+
30+
result = smart_scraper_multi_lite_graph.run()
31+
print(json.dumps(result, indent=4))
32+
33+
graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
34+
print(prettify_exec_info(graph_exec_info))
35+
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
import os
5+
import json
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperMultiLiteGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
12+
graph_config = {
13+
"llm": {
14+
"api_key": os.environ["AZURE_OPENAI_KEY"],
15+
"model": "azure_openai/gpt-4o"
16+
},
17+
"verbose": True,
18+
"headless": False
19+
}
20+
21+
smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
22+
prompt="Who is Marco Perini?",
23+
source= [
24+
"https://perinim.github.io/",
25+
"https://perinim.github.io/cv/"
26+
],
27+
config=graph_config
28+
)
29+
30+
result = smart_scraper_multi_lite_graph.run()
31+
print(json.dumps(result, indent=4))
32+
33+
graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
34+
print(prettify_exec_info(graph_exec_info))
35+
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
import json
5+
from scrapegraphai.graphs import SmartScraperMultiLiteGraph
6+
from scrapegraphai.utils import prettify_exec_info
7+
8+
graph_config = {
9+
"llm": {
10+
"client": "client_name",
11+
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
12+
"temperature": 0.0
13+
}
14+
}
15+
16+
smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
17+
prompt="Who is Marco Perini?",
18+
source= [
19+
"https://perinim.github.io/",
20+
"https://perinim.github.io/cv/"
21+
],
22+
config=graph_config
23+
)
24+
25+
result = smart_scraper_multi_lite_graph.run()
26+
print(json.dumps(result, indent=4))
27+
28+
graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
29+
print(prettify_exec_info(graph_exec_info))
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
import os
5+
import json
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperMultiLiteGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
12+
graph_config = {
13+
"llm": {
14+
"api_key": os.getenv("DEEPSEEK_API_KEY"),
15+
"model": "deepseek/deepseek-coder-33b-instruct",
16+
},
17+
"verbose": True,
18+
"headless": False,
19+
}
20+
21+
smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
22+
prompt="Who is Marco Perini?",
23+
source= [
24+
"https://perinim.github.io/",
25+
"https://perinim.github.io/cv/"
26+
],
27+
config=graph_config
28+
)
29+
30+
result = smart_scraper_multi_lite_graph.run()
31+
print(json.dumps(result, indent=4))
32+
33+
graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
34+
print(prettify_exec_info(graph_exec_info))
35+
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
import os
5+
import json
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperMultiLiteGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
12+
graph_config = {
13+
"llm": {
14+
"api_key": os.getenv("ERNIE_API_KEY"),
15+
"model": "ernie/ernie-bot-4",
16+
},
17+
"verbose": True,
18+
"headless": False,
19+
}
20+
21+
smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
22+
prompt="Who is Marco Perini?",
23+
source= [
24+
"https://perinim.github.io/",
25+
"https://perinim.github.io/cv/"
26+
],
27+
config=graph_config
28+
)
29+
30+
result = smart_scraper_multi_lite_graph.run()
31+
print(json.dumps(result, indent=4))
32+
33+
graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
34+
print(prettify_exec_info(graph_exec_info))
35+
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
import os
5+
import json
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperMultiLiteGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
12+
graph_config = {
13+
"llm": {
14+
"api_key": os.getenv("FIREWORKS_API_KEY"),
15+
"model": "fireworks/llama-v2-70b-chat",
16+
},
17+
"verbose": True,
18+
"headless": False,
19+
}
20+
21+
smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
22+
prompt="Who is Marco Perini?",
23+
source= [
24+
"https://perinim.github.io/",
25+
"https://perinim.github.io/cv/"
26+
],
27+
config=graph_config
28+
)
29+
30+
result = smart_scraper_multi_lite_graph.run()
31+
print(json.dumps(result, indent=4))
32+
33+
graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
34+
print(prettify_exec_info(graph_exec_info))
35+

examples/google_genai/smart_scraper_multi_lite_gemini.py

Whitespace-only changes.
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
import os
5+
import json
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperMultiLiteGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
12+
graph_config = {
13+
"llm": {
14+
"api_key": os.getenv("GOOGLE_API_KEY"),
15+
"model": "gemini-pro",
16+
},
17+
"verbose": True,
18+
"headless": False,
19+
}
20+
21+
smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
22+
prompt="Who is Marco Perini?",
23+
source= [
24+
"https://perinim.github.io/",
25+
"https://perinim.github.io/cv/"
26+
],
27+
config=graph_config
28+
)
29+
30+
result = smart_scraper_multi_lite_graph.run()
31+
print(json.dumps(result, indent=4))
32+
33+
graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
34+
print(prettify_exec_info(graph_exec_info))
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
import os
5+
import json
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperMultiLiteGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
12+
graph_config = {
13+
"llm": {
14+
"project": os.getenv("GOOGLE_CLOUD_PROJECT"),
15+
"location": "us-central1",
16+
"model": "text-bison@001",
17+
},
18+
"verbose": True,
19+
"headless": False,
20+
}
21+
22+
smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
23+
prompt="Who is Marco Perini?",
24+
source= [
25+
"https://perinim.github.io/",
26+
"https://perinim.github.io/cv/"
27+
],
28+
config=graph_config
29+
)
30+
31+
result = smart_scraper_multi_lite_graph.run()
32+
print(json.dumps(result, indent=4))
33+
34+
graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
35+
print(prettify_exec_info(graph_exec_info))

examples/google_vertexai/smart_scraper_multi_lite_vertex.py

Whitespace-only changes.
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
import os
5+
import json
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperMultiLiteGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
12+
graph_config = {
13+
"llm": {
14+
"api_key": os.getenv("GROQ_API_KEY"),
15+
"model": "mixtral-8x7b-32768",
16+
},
17+
"verbose": True,
18+
"headless": False,
19+
}
20+
21+
smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
22+
prompt="Who is Marco Perini?",
23+
source= [
24+
"https://perinim.github.io/",
25+
"https://perinim.github.io/cv/"
26+
],
27+
config=graph_config
28+
)
29+
30+
result = smart_scraper_multi_lite_graph.run()
31+
print(json.dumps(result, indent=4))
32+
33+
graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
34+
print(prettify_exec_info(graph_exec_info))
35+
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
import os
5+
import json
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperMultiLiteGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
12+
graph_config = {
13+
"llm": {
14+
"api_key": os.getenv("HUGGINGFACEHUB_API_TOKEN"),
15+
"model": "huggingfacehub/meta-llama/Llama-2-70b-chat-hf",
16+
},
17+
"verbose": True,
18+
"headless": False,
19+
}
20+
21+
smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
22+
prompt="Who is Marco Perini?",
23+
source= [
24+
"https://perinim.github.io/",
25+
"https://perinim.github.io/cv/"
26+
],
27+
config=graph_config
28+
)
29+
30+
result = smart_scraper_multi_lite_graph.run()
31+
print(json.dumps(result, indent=4))
32+
33+
graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
34+
print(prettify_exec_info(graph_exec_info))

examples/huggingfacehub/smart_scraper_multi_lite_uhggingfacehub.py

Whitespace-only changes.
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
import json
5+
from scrapegraphai.graphs import SmartScraperMultiLiteGraph
6+
from scrapegraphai.utils import prettify_exec_info
7+
8+
# ************************************************
9+
# Define the configuration for the graph
10+
# ************************************************
11+
12+
graph_config = {
13+
"llm": {
14+
"model": "ollama/llama3.1",
15+
"temperature": 0,
16+
"format": "json", # Ollama needs the format to be specified explicitly
17+
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
18+
},
19+
"verbose": True,
20+
"headless": False
21+
}
22+
23+
# ************************************************
24+
# Create the SmartScraperGraph instance and run it
25+
# ************************************************
26+
27+
smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
28+
prompt="Who is Marco Perini?",
29+
source= [
30+
"https://perinim.github.io/",
31+
"https://perinim.github.io/cv/"
32+
],
33+
config=graph_config
34+
)
35+
36+
result = smart_scraper_multi_lite_graph.run()
37+
print(json.dumps(result, indent=4))
38+
39+
# ************************************************
40+
# Get graph execution info
41+
# ************************************************
42+
43+
graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
44+
print(prettify_exec_info(graph_exec_info))
45+

0 commit comments

Comments
 (0)