Skip to content

Commit 004d03a

Browse files
committed
add examples
1 parent f4a253b commit 004d03a

18 files changed

+505
-17
lines changed

examples/bedrock/csv_scraper_bedrock.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,14 @@
3030

3131
graph_config = {
3232
"llm": {
33+
"client": "client_name",
3334
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
3435
"temperature": 0.0
3536
},
3637
"embeddings": {
3738
"model": "bedrock/cohere.embed-multilingual-v3"
3839
}
3940
}
40-
4141
# ************************************************
4242
# Create the CSVScraperGraph instance and run it
4343
# ************************************************

examples/bedrock/custom_graph_bedrock.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
graph_config = {
2727
"llm": {
28+
"client": "client_name",
2829
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
2930
"temperature": 0.0
3031
},

examples/bedrock/json_scraper_bedrock.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
graph_config = {
3131
"llm": {
32+
"client": "client_name",
3233
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
3334
"temperature": 0.0
3435
},
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
5+
import os, json
6+
from dotenv import load_dotenv
7+
from scrapegraphai.utils import prettify_exec_info
8+
from scrapegraphai.graphs import PDFScraperGraph
9+
load_dotenv()
10+
11+
12+
# ************************************************
13+
# Define the configuration for the graph
14+
# ************************************************
15+
16+
graph_config = {
17+
"llm": {
18+
"client": "client_name",
19+
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
20+
"temperature": 0.0
21+
},
22+
"embeddings": {
23+
"model": "bedrock/cohere.embed-multilingual-v3"
24+
}
25+
}
26+
27+
source = """
28+
The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian
29+
circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature.
30+
Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante
31+
from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God.
32+
Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood
33+
through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided
34+
by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love,
35+
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
36+
"""
37+
38+
schema = """
39+
{
40+
"type": "object",
41+
"properties": {
42+
"summary": {
43+
"type": "string"
44+
},
45+
"topics": {
46+
"type": "array",
47+
"items": {
48+
"type": "string"
49+
}
50+
}
51+
}
52+
}
53+
"""
54+
55+
pdf_scraper_graph = PDFScraperGraph(
56+
prompt="Summarize the text and find the main topics",
57+
source=source,
58+
config=graph_config,
59+
schema=schema,
60+
)
61+
result = pdf_scraper_graph.run()
62+
63+
print(json.dumps(result, indent=4))

examples/bedrock/scrape_plain_text_bedrock.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030

3131
graph_config = {
3232
"llm": {
33+
"client": "client_name",
3334
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
3435
"temperature": 0.0
3536
},

examples/bedrock/script_generator_bedrock.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,14 @@
1515

1616
graph_config = {
1717
"llm": {
18+
"client": "client_name",
1819
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
1920
"temperature": 0.0
2021
},
2122
"embeddings": {
2223
"model": "bedrock/cohere.embed-multilingual-v3"
2324
},
24-
"library": "beautifulsoup"
25+
"library": "beautifulsoup"
2526
}
2627

2728
# ************************************************

examples/bedrock/search_graph_bedrock.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@
1414

1515
graph_config = {
1616
"llm": {
17+
"client": "client_name",
1718
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
1819
"temperature": 0.0
1920
},
2021
"embeddings": {
21-
"model": "bedrock/amazon.titan-embed-text-v2:0"
22+
"model": "bedrock/cohere.embed-multilingual-v3"
2223
}
2324
}
24-
2525
# ************************************************
2626
# Create the SearchGraph instance and run it
2727
# ************************************************

examples/bedrock/smart_scraper_bedrock.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,15 @@
1414
# Define the configuration for the graph
1515
# ************************************************
1616

17-
openai_key = os.getenv("OPENAI_APIKEY")
18-
1917
graph_config = {
2018
"llm": {
21-
"api_key": openai_key,
22-
"model": "gpt-4o",
19+
"client": "client_name",
20+
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
21+
"temperature": 0.0
2322
},
24-
"verbose": True,
25-
"headless": False,
23+
"embeddings": {
24+
"model": "bedrock/cohere.embed-multilingual-v3"
25+
}
2626
}
2727

2828
# ************************************************
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
5+
import os, json
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperMultiGraph
8+
9+
load_dotenv()
10+
11+
# ************************************************
12+
# Define the configuration for the graph
13+
# ************************************************
14+
15+
graph_config = {
16+
"llm": {
17+
"client": "client_name",
18+
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
19+
"temperature": 0.0
20+
},
21+
"embeddings": {
22+
"model": "bedrock/cohere.embed-multilingual-v3"
23+
}
24+
}
25+
26+
# *******************************************************
27+
# Create the SmartScraperMultiGraph instance and run it
28+
# *******************************************************
29+
30+
multiple_search_graph = SmartScraperMultiGraph(
31+
prompt="Who is Marco Perini?",
32+
source= [
33+
"https://perinim.github.io/",
34+
"https://perinim.github.io/cv/"
35+
],
36+
schema=None,
37+
config=graph_config
38+
)
39+
40+
result = multiple_search_graph.run()
41+
print(json.dumps(result, indent=4))

examples/bedrock/smart_scraper_schema_bedrock.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,15 @@
3333
# Define the configuration for the graph
3434
# ************************************************
3535

36-
openai_key = os.getenv("OPENAI_APIKEY")
37-
3836
graph_config = {
3937
"llm": {
40-
"api_key": openai_key,
41-
"model": "gpt-4o",
38+
"client": "client_name",
39+
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
40+
"temperature": 0.0
4241
},
43-
"verbose": True,
44-
"headless": False,
42+
"embeddings": {
43+
"model": "bedrock/cohere.embed-multilingual-v3"
44+
}
4545
}
4646

4747
# ************************************************

examples/bedrock/xml_scraper_bedrock.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
graph_config = {
3030
"llm": {
31+
"client": "client_name",
3132
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
3233
"temperature": 0.0
3334
},
@@ -59,4 +60,3 @@
5960
# Save to json or csv
6061
convert_to_csv(result, "result")
6162
convert_to_json(result, "result")
62-
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
"""
2+
Example of custom graph using Gemini Google model
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.models import Gemini
8+
from scrapegraphai.graphs import BaseGraph
9+
from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode
10+
load_dotenv()
11+
12+
# ************************************************
13+
# Define the configuration for the graph
14+
# ************************************************
15+
16+
deepseek_key = os.getenv("DEEPSEEK_APIKEY")
17+
18+
graph_config = {
19+
"llm": {
20+
"model": "deepseek-chat",
21+
"openai_api_key": deepseek_key,
22+
"openai_api_base": 'https://api.deepseek.com/v1',
23+
},
24+
"verbose": True,
25+
}
26+
27+
# ************************************************
28+
# Define the graph nodes
29+
# ************************************************
30+
31+
llm_model = Gemini(graph_config["llm"])
32+
33+
# define the nodes for the graph
34+
fetch_node = FetchNode(
35+
input="url | local_dir",
36+
output=["doc"],
37+
)
38+
parse_node = ParseNode(
39+
input="doc",
40+
output=["parsed_doc"],
41+
node_config={"chunk_size": 4096}
42+
)
43+
rag_node = RAGNode(
44+
input="user_prompt & (parsed_doc | doc)",
45+
output=["relevant_chunks"],
46+
node_config={"llm": llm_model},
47+
)
48+
generate_answer_node = GenerateAnswerNode(
49+
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
50+
output=["answer"],
51+
node_config={"llm": llm_model},
52+
)
53+
54+
# ************************************************
55+
# Create the graph by defining the connections
56+
# ************************************************
57+
58+
graph = BaseGraph(
59+
nodes={
60+
fetch_node,
61+
parse_node,
62+
rag_node,
63+
generate_answer_node,
64+
},
65+
edges={
66+
(fetch_node, parse_node),
67+
(parse_node, rag_node),
68+
(rag_node, generate_answer_node)
69+
},
70+
entry_point=fetch_node
71+
)
72+
73+
# ************************************************
74+
# Execute the graph
75+
# ************************************************
76+
77+
result, execution_info = graph.execute({
78+
"user_prompt": "List me the projects with their description",
79+
"url": "https://perinim.github.io/projects/"
80+
})
81+
82+
# get the answer from the result
83+
result = result.get("answer", "No answer found.")
84+
print(result)
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
5+
import os, json
6+
from dotenv import load_dotenv
7+
from scrapegraphai.utils import prettify_exec_info
8+
from scrapegraphai.graphs import PDFScraperGraph
9+
load_dotenv()
10+
11+
12+
# ************************************************
13+
# Define the configuration for the graph
14+
# ************************************************
15+
16+
deepseek_key = os.getenv("DEEPSEEK_APIKEY")
17+
18+
graph_config = {
19+
"llm": {
20+
"model": "deepseek-chat",
21+
"openai_api_key": deepseek_key,
22+
"openai_api_base": 'https://api.deepseek.com/v1',
23+
},
24+
"verbose": True,
25+
}
26+
27+
source = """
28+
The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian
29+
circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature.
30+
Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante
31+
from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God.
32+
Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood
33+
through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided
34+
by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love,
35+
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
36+
"""
37+
38+
schema = """
39+
{
40+
"type": "object",
41+
"properties": {
42+
"summary": {
43+
"type": "string"
44+
},
45+
"topics": {
46+
"type": "array",
47+
"items": {
48+
"type": "string"
49+
}
50+
}
51+
}
52+
}
53+
"""
54+
55+
pdf_scraper_graph = PDFScraperGraph(
56+
prompt="Summarize the text and find the main topics",
57+
source=source,
58+
config=graph_config,
59+
schema=schema,
60+
)
61+
result = pdf_scraper_graph.run()
62+
63+
print(json.dumps(result, indent=4))

0 commit comments

Comments
 (0)