|
1 |
| -import os |
2 |
| -import pytest |
3 |
| -from unittest.mock import patch, MagicMock |
4 | 1 | from scrapegraphai.nodes import FetchNode
|
| 2 | +from langchain_core.documents import Document |
5 | 3 |
|
6 |
| -def get_file_path(file_name): |
7 |
| - """ |
8 |
| - Helper function to get the absolute file path. |
9 |
| - """ |
10 |
| - curr_dir = os.path.dirname(os.path.realpath(__file__)) |
11 |
| - file_path = os.path.join(curr_dir, file_name) |
12 |
| - return file_path |
13 | 4 |
|
14 |
| -@patch('scrapegraphai.nodes.FetchNode.execute') |
15 |
| -def test_fetch_node_html(mock_execute): |
16 |
| - """ |
17 |
| - Test FetchNode with HTML input. |
| 5 | +def test_fetch_html(mocker): |
| 6 | + title = "ScrapeGraph AI" |
| 7 | + link_url = "https://github.com/VinciGit00/Scrapegraph-ai" |
| 8 | + img_url = "https://raw.githubusercontent.com/VinciGit00/Scrapegraph-ai/main/docs/assets/scrapegraphai_logo.png" |
| 9 | + content = f""" |
| 10 | + <html> |
| 11 | + <head> |
| 12 | + <title>{title}</title> |
| 13 | + </head> |
| 14 | + <body> |
| 15 | + <a href="{link_url}">ScrapeGraphAI: You Only Scrape Once</a> |
| 16 | + <img src="{img_url}" alt="Scrapegraph-ai Logo"> |
| 17 | + </body> |
| 18 | + </html> |
18 | 19 | """
|
19 |
| - mock_execute.return_value = MagicMock() |
20 |
| - fetch_node = FetchNode( |
| 20 | + mock_loader_cls = mocker.patch("scrapegraphai.nodes.fetch_node.ChromiumLoader") |
| 21 | + mock_loader = mock_loader_cls.return_value |
| 22 | + mock_loader.load.return_value = [Document(page_content=content)] |
| 23 | + node = FetchNode( |
21 | 24 | input="url | local_dir",
|
22 |
| - output=["doc"], |
23 |
| - node_config={ |
24 |
| - "headless": False |
25 |
| - } |
| 25 | + output=["doc", "links", "images"], |
| 26 | + node_config={"headless": False}, |
26 | 27 | )
|
27 |
| - state = { |
28 |
| - "url": "https://twitter.com/home" |
29 |
| - } |
30 |
| - result = fetch_node.execute(state) |
31 |
| - assert result is not None |
32 |
| - mock_execute.assert_called_once_with(state) |
| 28 | + result = node.execute({"url": "https://scrapegraph-ai.com/example"}) |
33 | 29 |
|
34 |
| -@patch('scrapegraphai.nodes.FetchNode.execute') |
35 |
| -def test_fetch_node_json(mock_execute): |
36 |
| - """ |
37 |
| - Test FetchNode with JSON input. |
38 |
| - """ |
39 |
| - mock_execute.return_value = MagicMock() |
40 |
| - file_path_json = get_file_path("inputs/example.json") |
41 |
| - state_json = { |
42 |
| - "json": file_path_json |
43 |
| - } |
44 |
| - fetch_node_json = FetchNode( |
| 30 | + mock_loader.load.assert_called_once() |
| 31 | + doc = result["doc"][0] |
| 32 | + assert title in doc.page_content |
| 33 | + assert link_url in result["links"] |
| 34 | + assert img_url in result["images"] |
| 35 | + |
| 36 | + |
| 37 | +def test_fetch_json(): |
| 38 | + node = FetchNode( |
45 | 39 | input="json",
|
46 | 40 | output=["doc"],
|
47 | 41 | )
|
48 |
| - result_json = fetch_node_json.execute(state_json) |
49 |
| - assert result_json is not None |
50 |
| - mock_execute.assert_called_once_with(state_json) |
| 42 | + result = node.execute({"json": "tests/nodes/inputs/example.json"}) |
| 43 | + assert result is not None |
51 | 44 |
|
52 |
| -@patch('scrapegraphai.nodes.FetchNode.execute') |
53 |
| -def test_fetch_node_xml(mock_execute): |
54 |
| - """ |
55 |
| - Test FetchNode with XML input. |
56 |
| - """ |
57 |
| - mock_execute.return_value = MagicMock() |
58 |
| - file_path_xml = get_file_path("inputs/books.xml") |
59 |
| - state_xml = { |
60 |
| - "xml": file_path_xml |
61 |
| - } |
62 |
| - fetch_node_xml = FetchNode( |
| 45 | + |
| 46 | +def test_fetch_xml(): |
| 47 | + node = FetchNode( |
63 | 48 | input="xml",
|
64 | 49 | output=["doc"],
|
65 | 50 | )
|
66 |
| - result_xml = fetch_node_xml.execute(state_xml) |
67 |
| - assert result_xml is not None |
68 |
| - mock_execute.assert_called_once_with(state_xml) |
| 51 | + result = node.execute({"xml": "tests/nodes/inputs/books.xml"}) |
| 52 | + assert result is not None |
69 | 53 |
|
70 |
| -@patch('scrapegraphai.nodes.FetchNode.execute') |
71 |
| -def test_fetch_node_csv(mock_execute): |
72 |
| - """ |
73 |
| - Test FetchNode with CSV input. |
74 |
| - """ |
75 |
| - mock_execute.return_value = MagicMock() |
76 |
| - file_path_csv = get_file_path("inputs/username.csv") |
77 |
| - state_csv = { |
78 |
| - "csv": file_path_csv |
79 |
| - } |
80 |
| - fetch_node_csv = FetchNode( |
| 54 | + |
| 55 | +def test_fetch_csv(): |
| 56 | + node = FetchNode( |
81 | 57 | input="csv",
|
82 | 58 | output=["doc"],
|
83 | 59 | )
|
84 |
| - result_csv = fetch_node_csv.execute(state_csv) |
85 |
| - assert result_csv is not None |
86 |
| - mock_execute.assert_called_once_with(state_csv) |
| 60 | + result = node.execute({"csv": "tests/nodes/inputs/username.csv"}) |
| 61 | + assert result is not None |
87 | 62 |
|
88 |
| -@patch('scrapegraphai.nodes.FetchNode.execute') |
89 |
| -def test_fetch_node_txt(mock_execute): |
90 |
| - """ |
91 |
| - Test FetchNode with TXT input. |
92 |
| - """ |
93 |
| - mock_execute.return_value = MagicMock() |
94 |
| - file_path_txt = get_file_path("inputs/plain_html_example.txt") |
95 |
| - state_txt = { |
96 |
| - "txt": file_path_txt |
97 |
| - } |
98 |
| - fetch_node_txt = FetchNode( |
| 63 | + |
| 64 | +def test_fetch_txt(): |
| 65 | + node = FetchNode( |
99 | 66 | input="txt",
|
100 |
| - output=["doc"], |
| 67 | + output=["doc", "links", "images"], |
101 | 68 | )
|
102 |
| - result_txt = fetch_node_txt.execute(state_txt) |
103 |
| - assert result_txt is not None |
104 |
| - mock_execute.assert_called_once_with(state_txt) |
| 69 | + with open("tests/nodes/inputs/plain_html_example.txt") as f: |
| 70 | + result = node.execute({"txt": f.read()}) |
| 71 | + assert result is not None |
0 commit comments