Skip to content

Commit 17dd936

Browse files
committed
test: fix tests for fetch node with proper mock&refactor
1 parent cc9f5cc commit 17dd936

File tree

1 file changed

+50
-83
lines changed

1 file changed

+50
-83
lines changed

tests/nodes/fetch_node_test.py

Lines changed: 50 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,104 +1,71 @@
1-
import os
2-
import pytest
3-
from unittest.mock import patch, MagicMock
41
from scrapegraphai.nodes import FetchNode
2+
from langchain_core.documents import Document
53

6-
def get_file_path(file_name):
7-
"""
8-
Helper function to get the absolute file path.
9-
"""
10-
curr_dir = os.path.dirname(os.path.realpath(__file__))
11-
file_path = os.path.join(curr_dir, file_name)
12-
return file_path
134

14-
@patch('scrapegraphai.nodes.FetchNode.execute')
15-
def test_fetch_node_html(mock_execute):
16-
"""
17-
Test FetchNode with HTML input.
5+
def test_fetch_html(mocker):
6+
title = "ScrapeGraph AI"
7+
link_url = "https://github.com/VinciGit00/Scrapegraph-ai"
8+
img_url = "https://raw.githubusercontent.com/VinciGit00/Scrapegraph-ai/main/docs/assets/scrapegraphai_logo.png"
9+
content = f"""
10+
<html>
11+
<head>
12+
<title>{title}</title>
13+
</head>
14+
<body>
15+
<a href="{link_url}">ScrapeGraphAI: You Only Scrape Once</a>
16+
<img src="{img_url}" alt="Scrapegraph-ai Logo">
17+
</body>
18+
</html>
1819
"""
19-
mock_execute.return_value = MagicMock()
20-
fetch_node = FetchNode(
20+
mock_loader_cls = mocker.patch("scrapegraphai.nodes.fetch_node.ChromiumLoader")
21+
mock_loader = mock_loader_cls.return_value
22+
mock_loader.load.return_value = [Document(page_content=content)]
23+
node = FetchNode(
2124
input="url | local_dir",
22-
output=["doc"],
23-
node_config={
24-
"headless": False
25-
}
25+
output=["doc", "links", "images"],
26+
node_config={"headless": False},
2627
)
27-
state = {
28-
"url": "https://twitter.com/home"
29-
}
30-
result = fetch_node.execute(state)
31-
assert result is not None
32-
mock_execute.assert_called_once_with(state)
28+
result = node.execute({"url": "https://scrapegraph-ai.com/example"})
3329

34-
@patch('scrapegraphai.nodes.FetchNode.execute')
35-
def test_fetch_node_json(mock_execute):
36-
"""
37-
Test FetchNode with JSON input.
38-
"""
39-
mock_execute.return_value = MagicMock()
40-
file_path_json = get_file_path("inputs/example.json")
41-
state_json = {
42-
"json": file_path_json
43-
}
44-
fetch_node_json = FetchNode(
30+
mock_loader.load.assert_called_once()
31+
doc = result["doc"][0]
32+
assert title in doc.page_content
33+
assert link_url in result["links"]
34+
assert img_url in result["images"]
35+
36+
37+
def test_fetch_json():
38+
node = FetchNode(
4539
input="json",
4640
output=["doc"],
4741
)
48-
result_json = fetch_node_json.execute(state_json)
49-
assert result_json is not None
50-
mock_execute.assert_called_once_with(state_json)
42+
result = node.execute({"json": "tests/nodes/inputs/example.json"})
43+
assert result is not None
5144

52-
@patch('scrapegraphai.nodes.FetchNode.execute')
53-
def test_fetch_node_xml(mock_execute):
54-
"""
55-
Test FetchNode with XML input.
56-
"""
57-
mock_execute.return_value = MagicMock()
58-
file_path_xml = get_file_path("inputs/books.xml")
59-
state_xml = {
60-
"xml": file_path_xml
61-
}
62-
fetch_node_xml = FetchNode(
45+
46+
def test_fetch_xml():
47+
node = FetchNode(
6348
input="xml",
6449
output=["doc"],
6550
)
66-
result_xml = fetch_node_xml.execute(state_xml)
67-
assert result_xml is not None
68-
mock_execute.assert_called_once_with(state_xml)
51+
result = node.execute({"xml": "tests/nodes/inputs/books.xml"})
52+
assert result is not None
6953

70-
@patch('scrapegraphai.nodes.FetchNode.execute')
71-
def test_fetch_node_csv(mock_execute):
72-
"""
73-
Test FetchNode with CSV input.
74-
"""
75-
mock_execute.return_value = MagicMock()
76-
file_path_csv = get_file_path("inputs/username.csv")
77-
state_csv = {
78-
"csv": file_path_csv
79-
}
80-
fetch_node_csv = FetchNode(
54+
55+
def test_fetch_csv():
56+
node = FetchNode(
8157
input="csv",
8258
output=["doc"],
8359
)
84-
result_csv = fetch_node_csv.execute(state_csv)
85-
assert result_csv is not None
86-
mock_execute.assert_called_once_with(state_csv)
60+
result = node.execute({"csv": "tests/nodes/inputs/username.csv"})
61+
assert result is not None
8762

88-
@patch('scrapegraphai.nodes.FetchNode.execute')
89-
def test_fetch_node_txt(mock_execute):
90-
"""
91-
Test FetchNode with TXT input.
92-
"""
93-
mock_execute.return_value = MagicMock()
94-
file_path_txt = get_file_path("inputs/plain_html_example.txt")
95-
state_txt = {
96-
"txt": file_path_txt
97-
}
98-
fetch_node_txt = FetchNode(
63+
64+
def test_fetch_txt():
65+
node = FetchNode(
9966
input="txt",
100-
output=["doc"],
67+
output=["doc", "links", "images"],
10168
)
102-
result_txt = fetch_node_txt.execute(state_txt)
103-
assert result_txt is not None
104-
mock_execute.assert_called_once_with(state_txt)
69+
with open("tests/nodes/inputs/plain_html_example.txt") as f:
70+
result = node.execute({"txt": f.read()})
71+
assert result is not None

0 commit comments

Comments
 (0)