Skip to content

Commit bde0249

Browse files
committed
add examples
1 parent fe8083f commit bde0249

File tree

2 files changed

+95
-0
lines changed

2 files changed

+95
-0
lines changed

examples/gemini/xml_scraper_gemini.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
"""
2+
Basic example of scraping pipeline using XMLScraperGraph from XML documents
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import XMLScraperGraph
8+
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
9+
load_dotenv()
10+
11+
# ************************************************
12+
# Read the XML file
13+
# ************************************************
14+
15+
FILE_NAME = "inputs/books.xml"
16+
curr_dir = os.path.dirname(os.path.realpath(__file__))
17+
file_path = os.path.join(curr_dir, FILE_NAME)
18+
19+
with open(file_path, 'r', encoding="utf-8") as file:
20+
text = file.read()
21+
22+
# ************************************************
23+
# Define the configuration for the graph
24+
# ************************************************
25+
26+
gemini_key = os.getenv("GOOGLE_APIKEY")
27+
28+
graph_config = {
29+
"llm": {
30+
"api_key": gemini_key,
31+
"model": "gemini-pro",
32+
},
33+
}
34+
# ************************************************
35+
# Create the XMLScraperGraph instance and run it
36+
# ************************************************
37+
38+
xml_scraper_graph = XMLScraperGraph(
39+
prompt="List me all the authors, title and genres of the books",
40+
source=text, # Pass the content of the file, not the file object
41+
config=graph_config
42+
)
43+
44+
result = xml_scraper_graph.run()
45+
print(result)
46+
47+
# ************************************************
48+
# Get graph execution info
49+
# ************************************************
50+
51+
graph_exec_info = xml_scraper_graph.get_execution_info()
52+
print(prettify_exec_info(graph_exec_info))
53+
54+
# Save to json or csv
55+
convert_to_csv(result, "result")
56+
convert_to_json(result, "result")
57+
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
from scrapegraphai.graphs import SmartScraperGraph
5+
from scrapegraphai.utils import prettify_exec_info
6+
# ************************************************
7+
# Define the configuration for the graph
8+
# ************************************************
9+
10+
11+
graph_config = {
12+
"llm": {
13+
"api_key": "***************************",
14+
"model": "oneapi/qwen-turbo",
15+
"base_url": "http://127.0.0.1:3000/v1", # 设置 OneAPI URL
16+
}
17+
}
18+
19+
# ************************************************
20+
# Create the SmartScraperGraph instance and run it
21+
# ************************************************
22+
23+
smart_scraper_graph = SmartScraperGraph(
24+
prompt="List me all the titles",
25+
# also accepts a string with the already downloaded HTML code
26+
source="https://www.wired.com/",
27+
config=graph_config
28+
)
29+
30+
result = smart_scraper_graph.run()
31+
print(result)
32+
33+
# ************************************************
34+
# Get graph execution info
35+
# ************************************************
36+
37+
graph_exec_info = smart_scraper_graph.get_execution_info()
38+
print(prettify_exec_info(graph_exec_info))

0 commit comments

Comments
 (0)