|
1 |
| -""" |
2 |
| -Module for scraping json documents |
| 1 | +""" |
| 2 | +Module for scraping JSON documents |
3 | 3 | """
|
4 | 4 | import os
|
| 5 | +import json |
5 | 6 | import pytest
|
| 7 | + |
6 | 8 | from scrapegraphai.graphs import JSONScraperGraph
|
7 | 9 |
|
| 10 | +# Load configuration from a JSON file |
| 11 | +CONFIG_FILE = "config.json" |
| 12 | +with open(CONFIG_FILE, "r") as f: |
| 13 | + CONFIG = json.load(f) |
8 | 14 |
|
| 15 | +# Fixture to read the sample JSON file |
9 | 16 | @pytest.fixture
|
10 | 17 | def sample_json():
|
11 | 18 | """
|
12 |
| - Example of text |
| 19 | + Read the sample JSON file |
13 | 20 | """
|
14 |
| - file_name = "inputs/example.json" |
15 |
| - curr_dir = os.path.dirname(os.path.realpath(__file__)) |
16 |
| - file_path = os.path.join(curr_dir, file_name) |
17 |
| - |
18 |
| - with open(file_path, 'r', encoding="utf-8") as file: |
| 21 | + file_path = os.path.join(os.path.dirname(__file__), "inputs", "example.json") |
| 22 | + with open(file_path, "r", encoding="utf-8") as file: |
19 | 23 | text = file.read()
|
20 |
| - |
21 | 24 | return text
|
22 | 25 |
|
23 |
| - |
24 |
| -@pytest.fixture |
25 |
| -def graph_config(): |
| 26 | +# Parametrized fixture to load graph configurations |
| 27 | +@pytest.fixture(params=CONFIG["graph_configs"]) |
| 28 | +def graph_config(request): |
26 | 29 | """
|
27 |
| - Configuration of the graph |
| 30 | + Load graph configuration |
28 | 31 | """
|
29 |
| - return { |
30 |
| - "llm": { |
31 |
| - "model": "ollama/mistral", |
32 |
| - "temperature": 0, |
33 |
| - "format": "json", |
34 |
| - "base_url": "http://localhost:11434", |
35 |
| - }, |
36 |
| - "embeddings": { |
37 |
| - "model": "ollama/nomic-embed-text", |
38 |
| - "temperature": 0, |
39 |
| - "base_url": "http://localhost:11434", |
40 |
| - } |
41 |
| - } |
42 |
| - |
| 32 | + return request.param |
43 | 33 |
|
44 |
| -def test_scraping_pipeline(sample_json: str, graph_config: dict): |
| 34 | +# Test function for the scraping pipeline |
| 35 | +def test_scraping_pipeline(sample_json, graph_config): |
45 | 36 | """
|
46 |
| - Start of the scraping pipeline |
| 37 | + Test the scraping pipeline |
47 | 38 | """
|
| 39 | + expected_titles = ["Title 1", "Title 2", "Title 3"] # Replace with expected titles |
| 40 | + |
48 | 41 | smart_scraper_graph = JSONScraperGraph(
|
49 | 42 | prompt="List me all the titles",
|
50 | 43 | source=sample_json,
|
51 | 44 | config=graph_config
|
52 | 45 | )
|
53 |
| - |
54 | 46 | result = smart_scraper_graph.run()
|
55 | 47 |
|
56 | 48 | assert result is not None
|
| 49 | + assert isinstance(result, list) |
| 50 | + assert sorted(result) == sorted(expected_titles) |
0 commit comments