Skip to content

Commit 9daa5a6

Browse files
authored
Merge pull request #126 from stac-utils/add-pydantic
- Added --pydantic option for validating STAC objects using stac-pydantic models, providing enhanced type checking and validation - Improved bbox validation output to show detailed information about mismatches between bbox and geometry bounds, including which specific coordinates differ and by how much
2 parents 9429eeb + 5c2d2ef commit 9daa5a6

File tree

7 files changed

+177
-28
lines changed

7 files changed

+177
-28
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@ The format is (loosely) based on [Keep a Changelog](http://keepachangelog.com/)
1111
- Added sponsors and supporters section with logos ([#122](https://github.com/stac-utils/stac-check/pull/122))
1212
- Added check to verify that bbox matches item's polygon geometry ([#123](https://github.com/stac-utils/stac-check/pull/123))
1313
- Added configuration documentation to README ([#124](https://github.com/stac-utils/stac-check/pull/124))
14+
- Added `--pydantic` option for validating STAC objects using stac-pydantic models, providing enhanced type checking and validation ([#126](https://github.com/stac-utils/stac-check/pull/126))
15+
16+
### Enhanced
17+
18+
- Improved bbox validation output to show detailed information about mismatches between bbox and geometry bounds, including which specific coordinates differ and by how much ([#126](https://github.com/stac-utils/stac-check/pull/126))
1419

1520
### Updated
1621

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ Options:
8686
(enabled by default).
8787
--header KEY VALUE HTTP header to include in the requests. Can be used
8888
multiple times.
89+
--pydantic Use stac-pydantic for enhanced validation with Pydantic models.
8990
--help Show this message and exit.
9091
```
9192

sample_files/1.0.0/bad-item.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
-122.59750209,
99
37.48803556,
1010
-122.2880486,
11-
37.613537207
11+
37.613531207
1212
],
1313
"geometry": {
1414
"type": "Polygon",

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from setuptools import find_packages, setup
55

6-
__version__ = "1.6.0"
6+
__version__ = "1.7.0"
77

88
with open("README.md", "r") as fh:
99
long_description = fh.read()
@@ -20,7 +20,7 @@
2020
"requests>=2.32.3",
2121
"jsonschema>=4.23.0",
2222
"click>=8.1.8",
23-
"stac-validator>=3.6.0",
23+
"stac-validator[pydantic]>=3.7.0",
2424
"PyYAML",
2525
"python-dotenv",
2626
],

stac_check/cli.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,13 @@ def intro_message(linter: Linter) -> None:
9191
f"Validator: stac-validator {linter.validator_version}", bg="blue", fg="white"
9292
)
9393

94+
# Always show validation method
95+
validation_method = (
96+
"Pydantic" if hasattr(linter, "pydantic") and linter.pydantic else "JSONSchema"
97+
)
98+
click.secho()
99+
click.secho(f"Validation method: {validation_method}", bg="yellow", fg="black")
100+
94101
click.secho()
95102

96103

@@ -111,7 +118,17 @@ def cli_message(linter: Linter) -> None:
111118

112119
""" schemas validated for core object """
113120
click.secho()
114-
if len(linter.schema) > 0:
121+
122+
# Determine if we're using Pydantic validation
123+
using_pydantic = hasattr(linter, "pydantic") and linter.pydantic
124+
125+
# For Pydantic validation, always show the appropriate schema model
126+
if using_pydantic:
127+
click.secho("Schemas validated: ", fg="blue")
128+
asset_type = linter.asset_type.capitalize() if linter.asset_type else "Item"
129+
click.secho(f" stac-pydantic {asset_type} model")
130+
# For JSONSchema validation or when schemas are available
131+
elif len(linter.schema) > 0:
115132
click.secho("Schemas validated: ", fg="blue")
116133
for schema in linter.schema:
117134
click.secho(f" {schema}")
@@ -194,10 +211,15 @@ def cli_message(linter: Linter) -> None:
194211
multiple=True,
195212
help="HTTP header to include in the requests. Can be used multiple times.",
196213
)
214+
@click.option(
215+
"--pydantic",
216+
is_flag=True,
217+
help="Use stac-pydantic for enhanced validation with Pydantic models.",
218+
)
197219
@click.command()
198220
@click.argument("file")
199221
@click.version_option(version=importlib.metadata.distribution("stac-check").version)
200-
def main(file, recursive, max_depth, assets, links, no_assets_urls, header):
222+
def main(file, recursive, max_depth, assets, links, no_assets_urls, header, pydantic):
201223
linter = Linter(
202224
file,
203225
assets=assets,
@@ -206,6 +228,7 @@ def main(file, recursive, max_depth, assets, links, no_assets_urls, header):
206228
max_depth=max_depth,
207229
assets_open_urls=not no_assets_urls,
208230
headers=dict(header),
231+
pydantic=pydantic,
209232
)
210233
intro_message(linter)
211234
if recursive > 0:

stac_check/lint.py

Lines changed: 83 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import json
44
import os
55
from dataclasses import dataclass, field
6-
from typing import Any, Dict, List, Optional, Union
6+
from typing import Any, Dict, List, Optional, Tuple, Union
77

88
import requests
99
import yaml
@@ -27,6 +27,7 @@ class Linter:
2727
max_depth (Optional[int], optional): An optional integer indicating the maximum depth to validate recursively. Defaults to None.
2828
assets_open_urls (bool): Whether to open assets URLs when validating assets. Defaults to True.
2929
headers (dict): HTTP headers to include in the requests.
30+
pydantic (bool, optional): A boolean value indicating whether to use pydantic validation. Defaults to False.
3031
3132
Attributes:
3233
data (dict): A dictionary representing the STAC JSON file.
@@ -122,14 +123,15 @@ def check_summaries(self) -> bool:
122123
Creates a message with best practices recommendations for the STAC JSON file.
123124
"""
124125

125-
item: Union[str, dict] # url, file name, or dictionary
126+
item: Union[str, Dict]
126127
config_file: Optional[str] = None
127128
assets: bool = False
128129
links: bool = False
129130
recursive: bool = False
130131
max_depth: Optional[int] = None
131132
assets_open_urls: bool = True
132-
headers: dict = field(default_factory=dict)
133+
headers: Dict = field(default_factory=dict)
134+
pydantic: bool = False
133135

134136
def __post_init__(self):
135137
self.data = self.load_data(self.item)
@@ -270,16 +272,21 @@ def validate_file(self, file: Union[str, dict]) -> Dict[str, Any]:
270272
assets=self.assets,
271273
assets_open_urls=self.assets_open_urls,
272274
headers=self.headers,
275+
pydantic=self.pydantic,
273276
)
274277
stac.run()
275278
elif isinstance(file, dict):
276279
stac = StacValidate(
277-
assets_open_urls=self.assets_open_urls, headers=self.headers
280+
assets_open_urls=self.assets_open_urls,
281+
headers=self.headers,
282+
pydantic=self.pydantic,
278283
)
279284
stac.validate_dict(file)
280285
else:
281286
raise ValueError("Input must be a file path or STAC dictionary.")
282-
return stac.message[0]
287+
288+
message = stac.message[0]
289+
return message
283290

284291
def recursive_validation(self, file: Union[str, Dict[str, Any]]) -> str:
285292
"""Recursively validate a STAC item or catalog file and its child items.
@@ -302,6 +309,7 @@ def recursive_validation(self, file: Union[str, Dict[str, Any]]) -> str:
302309
max_depth=self.max_depth,
303310
assets_open_urls=self.assets_open_urls,
304311
headers=self.headers,
312+
pydantic=self.pydantic,
305313
)
306314
stac.run()
307315
else:
@@ -310,6 +318,7 @@ def recursive_validation(self, file: Union[str, Dict[str, Any]]) -> str:
310318
max_depth=self.max_depth,
311319
assets_open_urls=self.assets_open_urls,
312320
headers=self.headers,
321+
pydantic=self.pydantic,
313322
)
314323
stac.validate_dict(file)
315324
return stac.message
@@ -454,16 +463,20 @@ def check_geometry_null(self) -> bool:
454463
else:
455464
return False
456465

457-
def check_bbox_matches_geometry(self) -> bool:
466+
def check_bbox_matches_geometry(
467+
self,
468+
) -> Union[bool, Tuple[bool, List[float], List[float], List[float]]]:
458469
"""Checks if the bbox of a STAC item matches its geometry.
459470
460471
This function verifies that the bounding box (bbox) accurately represents
461472
the minimum bounding rectangle of the item's geometry. It only applies to
462473
items with non-null geometry of type Polygon or MultiPolygon.
463474
464475
Returns:
465-
bool: True if the bbox matches the geometry or if the check is not applicable
466-
(e.g., null geometry or non-polygon type). False if there's a mismatch.
476+
Union[bool, Tuple[bool, List[float], List[float], List[float]]]:
477+
- True if the bbox matches the geometry or if the check is not applicable
478+
(e.g., null geometry or non-polygon type).
479+
- When there's a mismatch: a tuple containing (False, calculated_bbox, actual_bbox, differences)
467480
"""
468481
# Skip check if geometry is null or bbox is not present
469482
if (
@@ -504,11 +517,14 @@ def check_bbox_matches_geometry(self) -> bool:
504517

505518
calc_bbox = [min(lons), min(lats), max(lons), max(lats)]
506519

507-
# Allow for small floating point differences (epsilon)
508-
epsilon = 1e-8
509-
for i in range(4):
510-
if abs(bbox[i] - calc_bbox[i]) > epsilon:
511-
return False
520+
# Allow for differences that would be invisible when rounded to 6 decimal places
521+
# 1e-6 would be exactly at the 6th decimal place, so use 5e-7 to be just under that threshold
522+
epsilon = 5e-7
523+
differences = [abs(bbox[i] - calc_bbox[i]) for i in range(4)]
524+
525+
if any(diff > epsilon for diff in differences):
526+
# Return False along with the calculated bbox, actual bbox, and the differences
527+
return (False, calc_bbox, bbox, differences)
512528

513529
return True
514530

@@ -675,12 +691,60 @@ def create_best_practices_dict(self) -> Dict:
675691
best_practices_dict["null_geometry"] = [msg_1]
676692

677693
# best practices - check if bbox matches geometry
678-
if (
679-
not self.check_bbox_matches_geometry()
680-
and config.get("check_bbox_geometry_match", True) == True
681-
):
682-
msg_1 = "The bbox field does not match the bounds of the geometry. The bbox should be the minimum bounding rectangle of the geometry."
683-
best_practices_dict["bbox_geometry_mismatch"] = [msg_1]
694+
bbox_check_result = self.check_bbox_matches_geometry()
695+
bbox_mismatch = False
696+
697+
if isinstance(bbox_check_result, tuple):
698+
bbox_mismatch = not bbox_check_result[0]
699+
else:
700+
bbox_mismatch = not bbox_check_result
701+
702+
if bbox_mismatch and config.get("check_bbox_geometry_match", True) == True:
703+
if isinstance(bbox_check_result, tuple):
704+
# Unpack the result
705+
_, calc_bbox, actual_bbox, differences = bbox_check_result
706+
707+
# Format the bbox values for display
708+
calc_bbox_str = ", ".join([f"{v:.6f}" for v in calc_bbox])
709+
actual_bbox_str = ", ".join([f"{v:.6f}" for v in actual_bbox])
710+
711+
# Create a more detailed message about which coordinates differ
712+
coordinate_labels = [
713+
"min longitude",
714+
"min latitude",
715+
"max longitude",
716+
"max latitude",
717+
]
718+
mismatch_details = []
719+
720+
# Use the same epsilon threshold as in check_bbox_matches_geometry
721+
epsilon = 5e-7
722+
723+
for i, (diff, label) in enumerate(zip(differences, coordinate_labels)):
724+
if diff > epsilon:
725+
mismatch_details.append(
726+
f"{label}: calculated={calc_bbox[i]:.6f}, actual={actual_bbox[i]:.6f}, diff={diff:.7f}"
727+
)
728+
729+
msg_1 = "The bbox field does not match the bounds of the geometry. The bbox should be the minimum bounding rectangle of the geometry."
730+
msg_2 = f"Calculated bbox from geometry: [{calc_bbox_str}]"
731+
msg_3 = f"Actual bbox in metadata: [{actual_bbox_str}]"
732+
733+
messages = [msg_1, msg_2, msg_3]
734+
if mismatch_details:
735+
messages.append("Mismatched coordinates:")
736+
messages.extend(mismatch_details)
737+
else:
738+
# If we got here but there are no visible differences at 6 decimal places,
739+
# add a note explaining that the differences are too small to matter
740+
messages.append(
741+
"Note: The differences are too small to be visible at 6 decimal places and can be ignored."
742+
)
743+
744+
best_practices_dict["bbox_geometry_mismatch"] = messages
745+
else:
746+
msg_1 = "The bbox field does not match the bounds of the geometry. The bbox should be the minimum bounding rectangle of the geometry."
747+
best_practices_dict["bbox_geometry_mismatch"] = [msg_1]
684748

685749
# check to see if there are too many links
686750
if (

tests/test_lint.py

Lines changed: 60 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ def test_bbox_matches_geometry():
282282
# Test with matching bbox and geometry
283283
file = "sample_files/1.0.0/core-item.json"
284284
linter = Linter(file)
285-
assert linter.check_bbox_matches_geometry() == True
285+
assert linter.check_bbox_matches_geometry() is True
286286

287287
# Test with mismatched bbox and geometry
288288
mismatched_item = {
@@ -306,7 +306,30 @@ def test_bbox_matches_geometry():
306306
"properties": {"datetime": "2020-12-11T22:38:32.125Z"},
307307
}
308308
linter = Linter(mismatched_item)
309-
assert linter.check_bbox_matches_geometry() == False
309+
result = linter.check_bbox_matches_geometry()
310+
311+
# Check that the result is a tuple and the first element is False
312+
assert isinstance(result, tuple)
313+
assert result[0] is False
314+
315+
# Check that the tuple contains the expected elements (calculated bbox, actual bbox, differences)
316+
assert len(result) == 4
317+
calc_bbox, actual_bbox, differences = result[1], result[2], result[3]
318+
319+
# Verify the calculated bbox matches the geometry coordinates
320+
assert calc_bbox == [
321+
172.91173669923782,
322+
1.3438851951615003,
323+
172.95469614953714,
324+
1.3690476620161975,
325+
]
326+
327+
# Verify the actual bbox is what we provided
328+
assert actual_bbox == [100.0, 0.0, 105.0, 1.0]
329+
330+
# Verify the differences are calculated correctly
331+
expected_differences = [abs(actual_bbox[i] - calc_bbox[i]) for i in range(4)]
332+
assert differences == expected_differences
310333

311334
# Test with null geometry (should return True as check is not applicable)
312335
null_geom_item = {
@@ -318,7 +341,7 @@ def test_bbox_matches_geometry():
318341
"properties": {"datetime": "2020-12-11T22:38:32.125Z"},
319342
}
320343
linter = Linter(null_geom_item)
321-
assert linter.check_bbox_matches_geometry() == True
344+
assert linter.check_bbox_matches_geometry() is True
322345

323346
# Test with missing bbox (should return True as check is not applicable)
324347
no_bbox_item = {
@@ -340,7 +363,7 @@ def test_bbox_matches_geometry():
340363
"properties": {"datetime": "2020-12-11T22:38:32.125Z"},
341364
}
342365
linter = Linter(no_bbox_item)
343-
assert linter.check_bbox_matches_geometry() == True
366+
assert linter.check_bbox_matches_geometry() is True
344367

345368

346369
def test_bloated_item():
@@ -633,3 +656,36 @@ def test_lint_assets_no_links():
633656
"request_invalid": [],
634657
},
635658
}
659+
660+
661+
def test_lint_pydantic_validation_valid():
662+
"""Test pydantic validation with a valid STAC item."""
663+
file = "sample_files/1.0.0/core-item.json"
664+
linter = Linter(file, pydantic=True)
665+
666+
assert linter.valid_stac == True
667+
assert linter.asset_type == "ITEM"
668+
assert "stac-pydantic Item model" in linter.message["schema"]
669+
assert linter.message["validation_method"] == "pydantic"
670+
671+
672+
def test_lint_pydantic_validation_invalid():
673+
"""Test pydantic validation with an invalid STAC item (missing required fields)."""
674+
file = "sample_files/1.0.0/bad-item.json"
675+
linter = Linter(file, pydantic=True)
676+
677+
assert linter.valid_stac == False
678+
assert "PydanticValidationError" in linter.message["error_type"]
679+
assert "id: Field required" in linter.message["error_message"]
680+
assert linter.message["validation_method"] == "pydantic"
681+
682+
683+
def test_lint_pydantic_validation_recursive():
684+
"""Test pydantic validation with recursive option."""
685+
file = "sample_files/1.0.0/collection.json"
686+
linter = Linter(file, recursive=True, max_depth=1, pydantic=True)
687+
688+
assert linter.valid_stac == True
689+
assert linter.asset_type == "COLLECTION"
690+
assert "stac-pydantic Collection model" in linter.message["schema"]
691+
assert linter.message["validation_method"] == "pydantic"

0 commit comments

Comments
 (0)