Skip to content

Commit 720f187

Browse files
committed
Merge branch 'fireworks_integration' into support
2 parents 2ab7ddc + 4b56604 commit 720f187

32 files changed

+1799
-182
lines changed

examples/fireworks/.env.example

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
FIREWORKS_APIKEY="your fireworks api key"
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
"""
2+
Basic example of scraping pipeline using CSVScraperGraph from CSV documents
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
import pandas as pd
8+
from scrapegraphai.graphs import CSVScraperGraph
9+
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
10+
load_dotenv()
11+
12+
# ************************************************
13+
# Read the CSV file
14+
# ************************************************
15+
16+
FILE_NAME = "inputs/username.csv"
17+
curr_dir = os.path.dirname(os.path.realpath(__file__))
18+
file_path = os.path.join(curr_dir, FILE_NAME)
19+
20+
text = pd.read_csv(file_path)
21+
22+
# ************************************************
23+
# Define the configuration for the graph
24+
# ************************************************
25+
fireworks_api_key = os.getenv("FIREWORKS_APIKEY")
26+
27+
graph_config = {
28+
"llm": {
29+
"api_key": fireworks_api_key,
30+
"model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
31+
},
32+
"embeddings": {
33+
"model": "ollama/nomic-embed-text",
34+
"temperature": 0,
35+
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily
36+
},
37+
"verbose": True,
38+
"headless": False,
39+
}
40+
41+
# ************************************************
42+
# Create the CSVScraperGraph instance and run it
43+
# ************************************************
44+
45+
csv_scraper_graph = CSVScraperGraph(
46+
prompt="List me all the last names",
47+
source=str(text), # Pass the content of the file, not the file object
48+
config=graph_config
49+
)
50+
51+
result = csv_scraper_graph.run()
52+
print(result)
53+
54+
# ************************************************
55+
# Get graph execution info
56+
# ************************************************
57+
58+
graph_exec_info = csv_scraper_graph.get_execution_info()
59+
print(prettify_exec_info(graph_exec_info))
60+
61+
# Save to json or csv
62+
convert_to_csv(result, "result")
63+
convert_to_json(result, "result")
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
"""
2+
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
import pandas as pd
8+
from scrapegraphai.graphs import CSVScraperMultiGraph
9+
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
10+
11+
load_dotenv()
12+
# ************************************************
13+
# Read the CSV file
14+
# ************************************************
15+
16+
FILE_NAME = "inputs/username.csv"
17+
curr_dir = os.path.dirname(os.path.realpath(__file__))
18+
file_path = os.path.join(curr_dir, FILE_NAME)
19+
20+
text = pd.read_csv(file_path)
21+
22+
# ************************************************
23+
# Define the configuration for the graph
24+
# ************************************************
25+
fireworks_api_key = os.getenv("FIREWORKS_APIKEY")
26+
27+
graph_config = {
28+
"llm": {
29+
"api_key": fireworks_api_key,
30+
"model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
31+
},
32+
"embeddings": {
33+
"model": "ollama/nomic-embed-text",
34+
"temperature": 0,
35+
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily
36+
},
37+
"verbose": True,
38+
"headless": False,
39+
}
40+
41+
# ************************************************
42+
# Create the CSVScraperMultiGraph instance and run it
43+
# ************************************************
44+
45+
csv_scraper_graph = CSVScraperMultiGraph(
46+
prompt="List me all the last names",
47+
source=[str(text), str(text)],
48+
config=graph_config
49+
)
50+
51+
result = csv_scraper_graph.run()
52+
print(result)
53+
54+
# ************************************************
55+
# Get graph execution info
56+
# ************************************************
57+
58+
graph_exec_info = csv_scraper_graph.get_execution_info()
59+
print(prettify_exec_info(graph_exec_info))
60+
61+
# Save to json or csv
62+
convert_to_csv(result, "result")
63+
convert_to_json(result, "result")
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
"""
2+
Example of custom graph using existing nodes
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
8+
from langchain_openai import OpenAIEmbeddings
9+
from scrapegraphai.models import OpenAI
10+
from scrapegraphai.graphs import BaseGraph
11+
from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode
12+
load_dotenv()
13+
14+
# ************************************************
15+
# Define the configuration for the graph
16+
# ************************************************
17+
18+
fireworks_api_key = os.getenv("FIREWORKS_APIKEY")
19+
20+
graph_config = {
21+
"llm": {
22+
"api_key": fireworks_api_key,
23+
"model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
24+
},
25+
"embeddings": {
26+
"model": "ollama/nomic-embed-text",
27+
"temperature": 0,
28+
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily
29+
},
30+
"verbose": True,
31+
"headless": False,
32+
}
33+
34+
# ************************************************
35+
# Define the graph nodes
36+
# ************************************************
37+
38+
llm_model = OpenAI(graph_config["llm"])
39+
embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
40+
41+
# define the nodes for the graph
42+
robot_node = RobotsNode(
43+
input="url",
44+
output=["is_scrapable"],
45+
node_config={
46+
"llm_model": llm_model,
47+
"force_scraping": True,
48+
"verbose": True,
49+
}
50+
)
51+
52+
fetch_node = FetchNode(
53+
input="url | local_dir",
54+
output=["doc", "link_urls", "img_urls"],
55+
node_config={
56+
"verbose": True,
57+
"headless": True,
58+
}
59+
)
60+
parse_node = ParseNode(
61+
input="doc",
62+
output=["parsed_doc"],
63+
node_config={
64+
"chunk_size": 4096,
65+
"verbose": True,
66+
}
67+
)
68+
rag_node = RAGNode(
69+
input="user_prompt & (parsed_doc | doc)",
70+
output=["relevant_chunks"],
71+
node_config={
72+
"llm_model": llm_model,
73+
"embedder_model": embedder,
74+
"verbose": True,
75+
}
76+
)
77+
generate_answer_node = GenerateAnswerNode(
78+
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
79+
output=["answer"],
80+
node_config={
81+
"llm_model": llm_model,
82+
"verbose": True,
83+
}
84+
)
85+
86+
# ************************************************
87+
# Create the graph by defining the connections
88+
# ************************************************
89+
90+
graph = BaseGraph(
91+
nodes=[
92+
robot_node,
93+
fetch_node,
94+
parse_node,
95+
rag_node,
96+
generate_answer_node,
97+
],
98+
edges=[
99+
(robot_node, fetch_node),
100+
(fetch_node, parse_node),
101+
(parse_node, rag_node),
102+
(rag_node, generate_answer_node)
103+
],
104+
entry_point=robot_node
105+
)
106+
107+
# ************************************************
108+
# Execute the graph
109+
# ************************************************
110+
111+
result, execution_info = graph.execute({
112+
"user_prompt": "Describe the content",
113+
"url": "https://example.com/"
114+
})
115+
116+
# get the answer from the result
117+
result = result.get("answer", "No answer found.")
118+
print(result)
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import DeepScraperGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
10+
load_dotenv()
11+
12+
# ************************************************
13+
# Define the configuration for the graph
14+
# ************************************************
15+
16+
fireworks_api_key = os.getenv("FIREWORKS_APIKEY")
17+
18+
graph_config = {
19+
"llm": {
20+
"api_key": fireworks_api_key,
21+
"model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
22+
},
23+
"embeddings": {
24+
"model": "ollama/nomic-embed-text",
25+
"temperature": 0,
26+
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily
27+
},
28+
"verbose": True,
29+
"max_depth": 1
30+
}
31+
32+
# ************************************************
33+
# Create the SmartScraperGraph instance and run it
34+
# ************************************************
35+
36+
deep_scraper_graph = DeepScraperGraph(
37+
prompt="List me all the job titles and detailed job description.",
38+
# also accepts a string with the already downloaded HTML code
39+
source="https://www.google.com/about/careers/applications/jobs/results/?location=Bangalore%20India",
40+
config=graph_config
41+
)
42+
43+
result = deep_scraper_graph.run()
44+
print(result)
45+
46+
# ************************************************
47+
# Get graph execution info
48+
# ************************************************
49+
50+
graph_exec_info = deep_scraper_graph.get_execution_info()
51+
print(deep_scraper_graph.get_state("relevant_links"))
52+
print(prettify_exec_info(graph_exec_info))

0 commit comments

Comments
 (0)