Skip to content

Commit ecd98b2

Browse files
committed
add sche,a example
1 parent 8296236 commit ecd98b2

File tree

7 files changed

+407
-1
lines changed

7 files changed

+407
-1
lines changed
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
from langchain_community.llms import HuggingFaceEndpoint
10+
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
11+
12+
13+
# required environment variables in .env
14+
# HUGGINGFACEHUB_API_TOKEN
15+
# ANTHROPIC_API_KEY
16+
load_dotenv()
17+
18+
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
19+
# ************************************************
20+
# Initialize the model instances
21+
# ************************************************
22+
23+
24+
embedder_model_instance = HuggingFaceInferenceAPIEmbeddings(
25+
api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2"
26+
)
27+
28+
# ************************************************
29+
# Define the output schema for the graph
30+
# ************************************************
31+
32+
schema= """
33+
{
34+
"Projects": [
35+
"Project #":
36+
{
37+
"title": "...",
38+
"description": "...",
39+
},
40+
"Project #":
41+
{
42+
"title": "...",
43+
"description": "...",
44+
}
45+
]
46+
}
47+
"""
48+
49+
# ************************************************
50+
# Create the SmartScraperGraph instance and run it
51+
# ************************************************
52+
53+
graph_config = {
54+
"llm": {
55+
"api_key": os.getenv("ANTHROPIC_API_KEY"),
56+
"model": "claude-3-haiku-20240307",
57+
"max_tokens": 4000},
58+
"embeddings": {"model_instance": embedder_model_instance}
59+
}
60+
61+
smart_scraper_graph = SmartScraperGraph(
62+
prompt="List me all the projects with their description",
63+
# also accepts a string with the already downloaded HTML code
64+
schema=schema,
65+
source="https://perinim.github.io/projects/",
66+
config=graph_config
67+
)
68+
69+
result = smart_scraper_graph.run()
70+
print(result)
71+
72+
# ************************************************
73+
# Get graph execution info
74+
# ************************************************
75+
76+
graph_exec_info = smart_scraper_graph.get_execution_info()
77+
print(prettify_exec_info(graph_exec_info))
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
# ************************************************
12+
# Define the output schema for the graph
13+
# ************************************************
14+
15+
schema= """
16+
{
17+
"Projects": [
18+
"Project #":
19+
{
20+
"title": "...",
21+
"description": "...",
22+
},
23+
"Project #":
24+
{
25+
"title": "...",
26+
"description": "...",
27+
}
28+
]
29+
}
30+
"""
31+
32+
# ************************************************
33+
# Define the configuration for the graph
34+
# ************************************************
35+
36+
openai_key = os.getenv("OPENAI_APIKEY")
37+
38+
graph_config = {
39+
"llm": {
40+
"api_key": openai_key,
41+
"model": "gpt-4o",
42+
},
43+
"verbose": True,
44+
"headless": False,
45+
}
46+
47+
# ************************************************
48+
# Create the SmartScraperGraph instance and run it
49+
# ************************************************
50+
51+
smart_scraper_graph = SmartScraperGraph(
52+
prompt="List me all the projects with their description",
53+
# also accepts a string with the already downloaded HTML code
54+
source="https://perinim.github.io/projects/",
55+
schema=schema,
56+
config=graph_config
57+
)
58+
59+
result = smart_scraper_graph.run()
60+
print(result)
61+
62+
# ************************************************
63+
# Get graph execution info
64+
# ************************************************
65+
66+
graph_exec_info = smart_scraper_graph.get_execution_info()
67+
print(prettify_exec_info(graph_exec_info))
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
12+
# ************************************************
13+
# Define the output schema for the graph
14+
# ************************************************
15+
16+
schema= """
17+
{
18+
"Projects": [
19+
"Project #":
20+
{
21+
"title": "...",
22+
"description": "...",
23+
},
24+
"Project #":
25+
{
26+
"title": "...",
27+
"description": "...",
28+
}
29+
]
30+
}
31+
"""
32+
33+
# ************************************************
34+
# Define the configuration for the graph
35+
# ************************************************
36+
37+
deepseek_key = os.getenv("DEEPSEEK_APIKEY")
38+
39+
graph_config = {
40+
"llm": {
41+
"model": "deepseek-chat",
42+
"openai_api_key": deepseek_key,
43+
"openai_api_base": 'https://api.deepseek.com/v1',
44+
},
45+
"verbose": True,
46+
}
47+
48+
# ************************************************
49+
# Create the SmartScraperGraph instance and run it
50+
# ************************************************
51+
52+
smart_scraper_graph = SmartScraperGraph(
53+
prompt="List me all the projects with their description.",
54+
# also accepts a string with the already downloaded HTML code
55+
source="https://perinim.github.io/projects/",
56+
schema=schema,
57+
config=graph_config
58+
)
59+
60+
result = smart_scraper_graph.run()
61+
print(result)
62+
63+
# ************************************************
64+
# Get graph execution info
65+
# ************************************************
66+
67+
graph_exec_info = smart_scraper_graph.get_execution_info()
68+
print(prettify_exec_info(graph_exec_info))
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper with schema
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.utils import prettify_exec_info
8+
from scrapegraphai.graphs import SmartScraperGraph
9+
load_dotenv()
10+
11+
# ************************************************
12+
# Define the output schema for the graph
13+
# ************************************************
14+
schema= """
15+
{
16+
"Projects": [
17+
"Project #":
18+
{
19+
"title": "...",
20+
"description": "...",
21+
},
22+
"Project #":
23+
{
24+
"title": "...",
25+
"description": "...",
26+
}
27+
]
28+
}
29+
"""
30+
31+
# ************************************************
32+
# Define the configuration for the graph
33+
# ************************************************
34+
35+
gemini_key = os.getenv("GOOGLE_APIKEY")
36+
37+
graph_config = {
38+
"llm": {
39+
"api_key": gemini_key,
40+
"model": "gemini-pro",
41+
},
42+
}
43+
44+
# ************************************************
45+
# Create the SmartScraperGraph instance and run it
46+
# ************************************************
47+
48+
smart_scraper_graph = SmartScraperGraph(
49+
prompt="List me all the news with their description.",
50+
# also accepts a string with the already downloaded HTML code
51+
source="https://www.wired.com",
52+
schema=schema,
53+
config=graph_config
54+
)
55+
56+
result = smart_scraper_graph.run()
57+
print(result)
58+
59+
# ************************************************
60+
# Get graph execution info
61+
# ************************************************
62+
63+
graph_exec_info = smart_scraper_graph.get_execution_info()
64+
print(prettify_exec_info(graph_exec_info))
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper with schema
3+
"""
4+
5+
import os, json
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
12+
# ************************************************
13+
# Define the output schema for the graph
14+
# ************************************************
15+
16+
schema= """
17+
{
18+
"Projects": [
19+
"Project #":
20+
{
21+
"title": "...",
22+
"description": "...",
23+
},
24+
"Project #":
25+
{
26+
"title": "...",
27+
"description": "...",
28+
}
29+
]
30+
}
31+
"""
32+
33+
# ************************************************
34+
# Define the configuration for the graph
35+
# ************************************************
36+
37+
groq_key = os.getenv("GROQ_APIKEY")
38+
openai_key = os.getenv("OPENAI_APIKEY")
39+
40+
graph_config = {
41+
"llm": {
42+
"model": "groq/gemma-7b-it",
43+
"api_key": groq_key,
44+
"temperature": 0
45+
},
46+
"embeddings": {
47+
"api_key": openai_key,
48+
"model": "openai",
49+
},
50+
"headless": False
51+
}
52+
53+
54+
55+
# ************************************************
56+
# Create the SmartScraperGraph instance and run it
57+
# ************************************************
58+
59+
smart_scraper_graph = SmartScraperGraph(
60+
prompt="List me all the projects with their description.",
61+
# also accepts a string with the already downloaded HTML code
62+
source="https://perinim.github.io/projects/",
63+
schema=schema,
64+
config=graph_config
65+
)
66+
67+
result = smart_scraper_graph.run()
68+
print(result)
69+
70+
# ************************************************
71+
# Get graph execution info
72+
# ************************************************
73+
74+
graph_exec_info = smart_scraper_graph.get_execution_info()
75+
print(prettify_exec_info(graph_exec_info))

0 commit comments

Comments
 (0)