Skip to content

Commit 743dfe1

Browse files
committed
add all possible examples
1 parent b408655 commit 743dfe1

16 files changed

+748
-3
lines changed
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
"""
2+
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
import pandas as pd
8+
from scrapegraphai.graphs import CSVScraperMultiGraph
9+
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
10+
11+
load_dotenv()
12+
# ************************************************
13+
# Read the CSV file
14+
# ************************************************
15+
16+
FILE_NAME = "inputs/username.csv"
17+
curr_dir = os.path.dirname(os.path.realpath(__file__))
18+
file_path = os.path.join(curr_dir, FILE_NAME)
19+
20+
text = pd.read_csv(file_path)
21+
22+
# ************************************************
23+
# Define the configuration for the graph
24+
# ************************************************
25+
26+
graph_config = {
27+
"llm": {
28+
"api_key": os.getenv("ANTHROPIC_API_KEY"),
29+
"model": "claude-3-haiku-20240307",
30+
"max_tokens": 4000},
31+
}
32+
33+
# ************************************************
34+
# Create the CSVScraperMultiGraph instance and run it
35+
# ************************************************
36+
37+
csv_scraper_graph = CSVScraperMultiGraph(
38+
prompt="List me all the last names",
39+
source=[str(text), str(text)],
40+
config=graph_config
41+
)
42+
43+
result = csv_scraper_graph.run()
44+
print(result)
45+
46+
# ************************************************
47+
# Get graph execution info
48+
# ************************************************
49+
50+
graph_exec_info = csv_scraper_graph.get_execution_info()
51+
print(prettify_exec_info(graph_exec_info))
52+
53+
# Save to json or csv
54+
convert_to_csv(result, "result")
55+
convert_to_json(result, "result")
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
"""
2+
Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import XMLScraperMultiGraph
8+
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
9+
load_dotenv()
10+
11+
# ************************************************
12+
# Read the XML file
13+
# ************************************************
14+
15+
FILE_NAME = "inputs/books.xml"
16+
curr_dir = os.path.dirname(os.path.realpath(__file__))
17+
file_path = os.path.join(curr_dir, FILE_NAME)
18+
19+
with open(file_path, 'r', encoding="utf-8") as file:
20+
text = file.read()
21+
22+
# ************************************************
23+
# Define the configuration for the graph
24+
# ************************************************
25+
26+
graph_config = {
27+
"llm": {
28+
"api_key": os.getenv("ANTHROPIC_API_KEY"),
29+
"model": "claude-3-haiku-20240307",
30+
"max_tokens": 4000},
31+
}
32+
33+
# ************************************************
34+
# Create the XMLScraperMultiGraph instance and run it
35+
# ************************************************
36+
37+
xml_scraper_graph = XMLScraperMultiGraph(
38+
prompt="List me all the authors, title and genres of the books",
39+
source=[text, text], # Pass the content of the file, not the file object
40+
config=graph_config
41+
)
42+
43+
result = xml_scraper_graph.run()
44+
print(result)
45+
46+
# ************************************************
47+
# Get graph execution info
48+
# ************************************************
49+
50+
graph_exec_info = xml_scraper_graph.get_execution_info()
51+
print(prettify_exec_info(graph_exec_info))
52+
53+
# Save to json or csv
54+
convert_to_csv(result, "result")
55+
convert_to_json(result, "result")
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
"""
2+
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
import pandas as pd
8+
from scrapegraphai.graphs import CSVScraperMultiGraph
9+
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
10+
11+
load_dotenv()
12+
# ************************************************
13+
# Read the CSV file
14+
# ************************************************
15+
16+
FILE_NAME = "inputs/username.csv"
17+
curr_dir = os.path.dirname(os.path.realpath(__file__))
18+
file_path = os.path.join(curr_dir, FILE_NAME)
19+
20+
text = pd.read_csv(file_path)
21+
22+
# ************************************************
23+
# Define the configuration for the graph
24+
# ************************************************
25+
26+
graph_config = {
27+
"llm": {
28+
"client": "client_name",
29+
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
30+
"temperature": 0.0
31+
},
32+
"embeddings": {
33+
"model": "bedrock/cohere.embed-multilingual-v3"
34+
}
35+
}
36+
37+
# ************************************************
38+
# Create the CSVScraperMultiGraph instance and run it
39+
# ************************************************
40+
41+
csv_scraper_graph = CSVScraperMultiGraph(
42+
prompt="List me all the last names",
43+
source=[str(text), str(text)],
44+
config=graph_config
45+
)
46+
47+
result = csv_scraper_graph.run()
48+
print(result)
49+
50+
# ************************************************
51+
# Get graph execution info
52+
# ************************************************
53+
54+
graph_exec_info = csv_scraper_graph.get_execution_info()
55+
print(prettify_exec_info(graph_exec_info))
56+
57+
# Save to json or csv
58+
convert_to_csv(result, "result")
59+
convert_to_json(result, "result")
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
"""
2+
Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import XMLScraperMultiGraph
8+
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
9+
load_dotenv()
10+
11+
# ************************************************
12+
# Read the XML file
13+
# ************************************************
14+
15+
FILE_NAME = "inputs/books.xml"
16+
curr_dir = os.path.dirname(os.path.realpath(__file__))
17+
file_path = os.path.join(curr_dir, FILE_NAME)
18+
19+
with open(file_path, 'r', encoding="utf-8") as file:
20+
text = file.read()
21+
22+
# ************************************************
23+
# Define the configuration for the graph
24+
# ************************************************
25+
26+
graph_config = {
27+
"llm": {
28+
"client": "client_name",
29+
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
30+
"temperature": 0.0
31+
},
32+
"embeddings": {
33+
"model": "bedrock/cohere.embed-multilingual-v3"
34+
}
35+
}
36+
37+
# ************************************************
38+
# Create the XMLScraperMultiGraph instance and run it
39+
# ************************************************
40+
41+
xml_scraper_graph = XMLScraperMultiGraph(
42+
prompt="List me all the authors, title and genres of the books",
43+
source=[text, text], # Pass the content of the file, not the file object
44+
config=graph_config
45+
)
46+
47+
result = xml_scraper_graph.run()
48+
print(result)
49+
50+
# ************************************************
51+
# Get graph execution info
52+
# ************************************************
53+
54+
graph_exec_info = xml_scraper_graph.get_execution_info()
55+
print(prettify_exec_info(graph_exec_info))
56+
57+
# Save to json or csv
58+
convert_to_csv(result, "result")
59+
convert_to_json(result, "result")
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
"""
2+
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
import pandas as pd
8+
from scrapegraphai.graphs import CSVScraperMultiGraph
9+
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
10+
11+
load_dotenv()
12+
# ************************************************
13+
# Read the CSV file
14+
# ************************************************
15+
16+
FILE_NAME = "inputs/username.csv"
17+
curr_dir = os.path.dirname(os.path.realpath(__file__))
18+
file_path = os.path.join(curr_dir, FILE_NAME)
19+
20+
text = pd.read_csv(file_path)
21+
22+
# ************************************************
23+
# Define the configuration for the graph
24+
# ************************************************
25+
26+
deepseek_key = os.getenv("DEEPSEEK_APIKEY")
27+
28+
graph_config = {
29+
"llm": {
30+
"model": "deepseek-chat",
31+
"openai_api_key": deepseek_key,
32+
"openai_api_base": 'https://api.deepseek.com/v1',
33+
},
34+
"verbose": True,
35+
}
36+
# ************************************************
37+
# Create the CSVScraperMultiGraph instance and run it
38+
# ************************************************
39+
40+
csv_scraper_graph = CSVScraperMultiGraph(
41+
prompt="List me all the last names",
42+
source=[str(text), str(text)],
43+
config=graph_config
44+
)
45+
46+
result = csv_scraper_graph.run()
47+
print(result)
48+
49+
# ************************************************
50+
# Get graph execution info
51+
# ************************************************
52+
53+
graph_exec_info = csv_scraper_graph.get_execution_info()
54+
print(prettify_exec_info(graph_exec_info))
55+
56+
# Save to json or csv
57+
convert_to_csv(result, "result")
58+
convert_to_json(result, "result")
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
"""
2+
Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import XMLScraperMultiGraph
8+
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
9+
load_dotenv()
10+
11+
# ************************************************
12+
# Read the XML file
13+
# ************************************************
14+
15+
FILE_NAME = "inputs/books.xml"
16+
curr_dir = os.path.dirname(os.path.realpath(__file__))
17+
file_path = os.path.join(curr_dir, FILE_NAME)
18+
19+
with open(file_path, 'r', encoding="utf-8") as file:
20+
text = file.read()
21+
22+
# ************************************************
23+
# Define the configuration for the graph
24+
# ************************************************
25+
26+
deepseek_key = os.getenv("DEEPSEEK_APIKEY")
27+
28+
graph_config = {
29+
"llm": {
30+
"model": "deepseek-chat",
31+
"openai_api_key": deepseek_key,
32+
"openai_api_base": 'https://api.deepseek.com/v1',
33+
},
34+
"verbose": True,
35+
}
36+
# ************************************************
37+
# Create the XMLScraperMultiGraph instance and run it
38+
# ************************************************
39+
40+
xml_scraper_graph = XMLScraperMultiGraph(
41+
prompt="List me all the authors, title and genres of the books",
42+
source=[text, text], # Pass the content of the file, not the file object
43+
config=graph_config
44+
)
45+
46+
result = xml_scraper_graph.run()
47+
print(result)
48+
49+
# ************************************************
50+
# Get graph execution info
51+
# ************************************************
52+
53+
graph_exec_info = xml_scraper_graph.get_execution_info()
54+
print(prettify_exec_info(graph_exec_info))
55+
56+
# Save to json or csv
57+
convert_to_csv(result, "result")
58+
convert_to_json(result, "result")

0 commit comments

Comments
 (0)