Skip to content

Commit 0c15947

Browse files
committed
fix(fetch-node): removed isSoup from default
1 parent 353382b commit 0c15947

File tree

3 files changed

+8
-7
lines changed

3 files changed

+8
-7
lines changed

examples/openai/search_graph_openai.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
graph_config = {
1818
"llm": {
1919
"api_key": openai_key,
20-
"model": "gpt-3.5-turbo",
20+
"model": "gpt-4-turbo",
2121
},
2222
"max_results": 2,
2323
"verbose": True,
@@ -28,7 +28,7 @@
2828
# ************************************************
2929

3030
search_graph = SearchGraph(
31-
prompt="List me the best escursions near Trento",
31+
prompt="List me the heir of the British throne.",
3232
config=graph_config
3333
)
3434

examples/openai/smart_scraper_openai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
# ************************************************
3131

3232
smart_scraper_graph = SmartScraperGraph(
33-
prompt="List me all the projects with their description.",
33+
prompt="List me all the links in the page",
3434
# also accepts a string with the already downloaded HTML code
3535
source="https://perinim.github.io/projects/",
3636
config=graph_config

scrapegraphai/nodes/fetch_node.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ def __init__(
5151
False if node_config is None else node_config.get("verbose", False)
5252
)
5353
self.useSoup = (
54-
True if node_config is None else node_config.get("useSoup", True)
54+
False if node_config is None else node_config.get("useSoup", False)
55+
)
5556
self.loader_kwargs = (
5657
{} if node_config is None else node_config.get("loader_kwargs", {})
5758
)
@@ -117,7 +118,7 @@ def execute(self, state):
117118
pass
118119

119120
elif not source.startswith("http"):
120-
compressed_document = [Document(page_content=cleanup_html(source),
121+
compressed_document = [Document(page_content=cleanup_html(data, source),
121122
metadata={"source": "local_dir"}
122123
)]
123124

@@ -127,7 +128,7 @@ def execute(self, state):
127128
cleanedup_html = cleanup_html(response.text, source)
128129
compressed_document = [Document(page_content=cleanedup_html)]
129130
else:
130-
print(f"Failed to retrieve contents from the webpage at url: {url}")
131+
print(f"Failed to retrieve contents from the webpage at url: {source}")
131132

132133
else:
133134
loader_kwargs = {}
@@ -139,7 +140,7 @@ def execute(self, state):
139140

140141
document = loader.load()
141142
compressed_document = [
142-
Document(page_content=cleanup_html(str(document[0].page_content)))
143+
Document(page_content=cleanup_html(str(document[0].page_content), source), metadata={"source": source})
143144
]
144145

145146
state.update({self.output[0]: compressed_document})

0 commit comments

Comments
 (0)