Skip to content

Commit 261c4fc

Browse files
authored
Merge pull request #352 from tejhande/patch-1
test: Enhance JSON scraping pipeline test
2 parents c17daca + d845a1b commit 261c4fc

File tree

1 file changed

+24
-30
lines changed

1 file changed

+24
-30
lines changed

tests/graphs/scrape_json_ollama.py

Lines changed: 24 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,50 @@
1-
"""
2-
Module for scraping json documents
1+
"""
2+
Module for scraping JSON documents
33
"""
44
import os
5+
import json
56
import pytest
7+
68
from scrapegraphai.graphs import JSONScraperGraph
79

10+
# Load configuration from a JSON file
11+
CONFIG_FILE = "config.json"
12+
with open(CONFIG_FILE, "r") as f:
13+
CONFIG = json.load(f)
814

15+
# Fixture to read the sample JSON file
916
@pytest.fixture
1017
def sample_json():
1118
"""
12-
Example of text
19+
Read the sample JSON file
1320
"""
14-
file_name = "inputs/example.json"
15-
curr_dir = os.path.dirname(os.path.realpath(__file__))
16-
file_path = os.path.join(curr_dir, file_name)
17-
18-
with open(file_path, 'r', encoding="utf-8") as file:
21+
file_path = os.path.join(os.path.dirname(__file__), "inputs", "example.json")
22+
with open(file_path, "r", encoding="utf-8") as file:
1923
text = file.read()
20-
2124
return text
2225

23-
24-
@pytest.fixture
25-
def graph_config():
26+
# Parametrized fixture to load graph configurations
27+
@pytest.fixture(params=CONFIG["graph_configs"])
28+
def graph_config(request):
2629
"""
27-
Configuration of the graph
30+
Load graph configuration
2831
"""
29-
return {
30-
"llm": {
31-
"model": "ollama/mistral",
32-
"temperature": 0,
33-
"format": "json",
34-
"base_url": "http://localhost:11434",
35-
},
36-
"embeddings": {
37-
"model": "ollama/nomic-embed-text",
38-
"temperature": 0,
39-
"base_url": "http://localhost:11434",
40-
}
41-
}
42-
32+
return request.param
4333

44-
def test_scraping_pipeline(sample_json: str, graph_config: dict):
34+
# Test function for the scraping pipeline
35+
def test_scraping_pipeline(sample_json, graph_config):
4536
"""
46-
Start of the scraping pipeline
37+
Test the scraping pipeline
4738
"""
39+
expected_titles = ["Title 1", "Title 2", "Title 3"] # Replace with expected titles
40+
4841
smart_scraper_graph = JSONScraperGraph(
4942
prompt="List me all the titles",
5043
source=sample_json,
5144
config=graph_config
5245
)
53-
5446
result = smart_scraper_graph.run()
5547

5648
assert result is not None
49+
assert isinstance(result, list)
50+
assert sorted(result) == sorted(expected_titles)

0 commit comments

Comments
 (0)