Skip to content

Commit 84fcb44

Browse files
committed
feat: fixed custom_graphs example and robots_node
1 parent 8c5397f commit 84fcb44

File tree

5 files changed

+139
-15
lines changed

5 files changed

+139
-15
lines changed

examples/openai/custom_graph_openai.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
import os
66
from dotenv import load_dotenv
7+
8+
from langchain_openai import OpenAIEmbeddings
79
from scrapegraphai.models import OpenAI
810
from scrapegraphai.graphs import BaseGraph
911
from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode
@@ -20,7 +22,7 @@
2022
"api_key": openai_key,
2123
"model": "gpt-3.5-turbo",
2224
"temperature": 0,
23-
"streaming": True
25+
"streaming": False
2426
},
2527
}
2628

@@ -29,33 +31,50 @@
2931
# ************************************************
3032

3133
llm_model = OpenAI(graph_config["llm"])
34+
embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
3235

3336
# define the nodes for the graph
3437
robot_node = RobotsNode(
3538
input="url",
3639
output=["is_scrapable"],
37-
node_config={"llm_model": llm_model}
40+
node_config={
41+
"llm_model": llm_model,
42+
"verbose": True,
43+
}
3844
)
3945

4046
fetch_node = FetchNode(
4147
input="url | local_dir",
4248
output=["doc"],
43-
node_config={"headless": True, "verbose": True}
49+
node_config={
50+
"verbose": True,
51+
"headless": True,
52+
}
4453
)
4554
parse_node = ParseNode(
4655
input="doc",
4756
output=["parsed_doc"],
48-
node_config={"chunk_size": 4096}
57+
node_config={
58+
"chunk_size": 4096,
59+
"verbose": True,
60+
}
4961
)
5062
rag_node = RAGNode(
5163
input="user_prompt & (parsed_doc | doc)",
5264
output=["relevant_chunks"],
53-
node_config={"llm_model": llm_model},
65+
node_config={
66+
"llm_model": llm_model,
67+
"embedder_model": embedder,
68+
"verbose": True,
69+
}
5470
)
5571
generate_answer_node = GenerateAnswerNode(
5672
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
5773
output=["answer"],
58-
node_config={"llm_model": llm_model},
74+
node_config={
75+
"llm_model": llm_model,
76+
"verbose": True,
77+
}
5978
)
6079

6180
# ************************************************

examples/openai/smart_scraper_openai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
"api_key": openai_key,
2222
"model": "gpt-3.5-turbo",
2323
},
24-
"verbose": True,
24+
"verbose": False,
2525
}
2626

2727
# ************************************************

scrapegraphai/graphs/abstract_graph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def __init__(self, prompt: str, config: dict, source: Optional[str] = None):
5656
self.execution_info = None
5757

5858
# Set common configuration parameters
59-
self.verbose = True if config is None else config.get("verbose", False)
59+
self.verbose = False if config is None else config.get("verbose", False)
6060
self.headless = True if config is None else config.get(
6161
"headless", True)
6262
common_params = {"headless": self.headless,
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
"""
2+
Example of custom graph using existing nodes
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from langchain_openai import OpenAIEmbeddings
8+
from scrapegraphai.models import OpenAI
9+
from scrapegraphai.graphs import BaseGraph
10+
from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, SearchInternetNode
11+
load_dotenv()
12+
13+
# ************************************************
14+
# Define the configuration for the graph
15+
# ************************************************
16+
17+
openai_key = os.getenv("OPENAI_APIKEY")
18+
19+
graph_config = {
20+
"llm": {
21+
"api_key": openai_key,
22+
"model": "gpt-3.5-turbo",
23+
},
24+
}
25+
26+
# ************************************************
27+
# Define the graph nodes
28+
# ************************************************
29+
30+
llm_model = OpenAI(graph_config["llm"])
31+
embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
32+
33+
search_internet_node = SearchInternetNode(
34+
input="user_prompt",
35+
output=["url"],
36+
node_config={
37+
"llm_model": llm_model
38+
}
39+
)
40+
fetch_node = FetchNode(
41+
input="url | local_dir",
42+
output=["doc"],
43+
node_config={
44+
"verbose": True,
45+
"headless": True,
46+
}
47+
)
48+
parse_node = ParseNode(
49+
input="doc",
50+
output=["parsed_doc"],
51+
node_config={
52+
"chunk_size": 4096,
53+
"verbose": True,
54+
}
55+
)
56+
rag_node = RAGNode(
57+
input="user_prompt & (parsed_doc | doc)",
58+
output=["relevant_chunks"],
59+
node_config={
60+
"llm_model": llm_model,
61+
"embedder_model": embedder,
62+
"verbose": True,
63+
}
64+
)
65+
generate_answer_node = GenerateAnswerNode(
66+
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
67+
output=["answer"],
68+
node_config={
69+
"llm_model": llm_model,
70+
"verbose": True,
71+
}
72+
)
73+
74+
# ************************************************
75+
# Create the graph by defining the connections
76+
# ************************************************
77+
78+
graph = BaseGraph(
79+
nodes=[
80+
search_internet_node,
81+
fetch_node,
82+
parse_node,
83+
rag_node,
84+
generate_answer_node,
85+
],
86+
edges=[
87+
(search_internet_node, fetch_node),
88+
(fetch_node, parse_node),
89+
(parse_node, rag_node),
90+
(rag_node, generate_answer_node)
91+
],
92+
entry_point=search_internet_node
93+
)
94+
95+
# ************************************************
96+
# Execute the graph
97+
# ************************************************
98+
99+
result, execution_info = graph.execute({
100+
"user_prompt": "List me all the typical Chioggia dishes."
101+
})
102+
103+
# get the answer from the result
104+
result = result.get("answer", "No answer found.")
105+
print(result)

scrapegraphai/nodes/robots_node.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
RobotsNode Module
33
"""
44

5-
from typing import List
5+
from typing import List, Optional
66
from urllib.parse import urlparse
7-
from langchain_community.document_loaders import AsyncHtmlLoader
7+
from langchain_community.document_loaders import AsyncChromiumLoader
88
from langchain.prompts import PromptTemplate
99
from langchain.output_parsers import CommaSeparatedListOutputParser
1010
from .base_node import BaseNode
@@ -34,7 +34,7 @@ class RobotsNode(BaseNode):
3434
node_name (str): The unique identifier name for the node, defaulting to "Robots".
3535
"""
3636

37-
def __init__(self, input: str, output: List[str], node_config: dict, force_scraping=True,
37+
def __init__(self, input: str, output: List[str], node_config: Optional[dict]=None, force_scraping=True,
3838
node_name: str = "Robots"):
3939
super().__init__(node_name, "node", input, output, 1)
4040

@@ -93,11 +93,11 @@ def execute(self, state: dict) -> dict:
9393
else:
9494
parsed_url = urlparse(source)
9595
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
96-
loader = AsyncHtmlLoader(f"{base_url}/robots.txt")
96+
loader = AsyncChromiumLoader(f"{base_url}/robots.txt")
9797
document = loader.load()
98-
if "ollama" in self.llm_model.model:
99-
self.llm_model.model = self.llm_model.model.split("/")[-1]
100-
model = self.llm_model.model.split("/")[-1]
98+
if "ollama" in self.llm_model.model_name:
99+
self.llm_model.model_name = self.llm_model.model_name.split("/")[-1]
100+
model = self.llm_model.model_name.split("/")[-1]
101101

102102
else:
103103
model = self.llm_model.model_name

0 commit comments

Comments
 (0)