3
3
import json
4
4
import os
5
5
from dataclasses import dataclass , field
6
- from typing import Any , Dict , List , Optional , Union
6
+ from typing import Any , Dict , List , Optional , Tuple , Union
7
7
8
8
import requests
9
9
import yaml
@@ -27,6 +27,7 @@ class Linter:
27
27
max_depth (Optional[int], optional): An optional integer indicating the maximum depth to validate recursively. Defaults to None.
28
28
assets_open_urls (bool): Whether to open assets URLs when validating assets. Defaults to True.
29
29
headers (dict): HTTP headers to include in the requests.
30
+ pydantic (bool, optional): A boolean value indicating whether to use pydantic validation. Defaults to False.
30
31
31
32
Attributes:
32
33
data (dict): A dictionary representing the STAC JSON file.
@@ -122,14 +123,15 @@ def check_summaries(self) -> bool:
122
123
Creates a message with best practices recommendations for the STAC JSON file.
123
124
"""
124
125
125
- item : Union [str , dict ] # url, file name, or dictionary
126
+ item : Union [str , Dict ]
126
127
config_file : Optional [str ] = None
127
128
assets : bool = False
128
129
links : bool = False
129
130
recursive : bool = False
130
131
max_depth : Optional [int ] = None
131
132
assets_open_urls : bool = True
132
- headers : dict = field (default_factory = dict )
133
+ headers : Dict = field (default_factory = dict )
134
+ pydantic : bool = False
133
135
134
136
def __post_init__ (self ):
135
137
self .data = self .load_data (self .item )
@@ -270,16 +272,21 @@ def validate_file(self, file: Union[str, dict]) -> Dict[str, Any]:
270
272
assets = self .assets ,
271
273
assets_open_urls = self .assets_open_urls ,
272
274
headers = self .headers ,
275
+ pydantic = self .pydantic ,
273
276
)
274
277
stac .run ()
275
278
elif isinstance (file , dict ):
276
279
stac = StacValidate (
277
- assets_open_urls = self .assets_open_urls , headers = self .headers
280
+ assets_open_urls = self .assets_open_urls ,
281
+ headers = self .headers ,
282
+ pydantic = self .pydantic ,
278
283
)
279
284
stac .validate_dict (file )
280
285
else :
281
286
raise ValueError ("Input must be a file path or STAC dictionary." )
282
- return stac .message [0 ]
287
+
288
+ message = stac .message [0 ]
289
+ return message
283
290
284
291
def recursive_validation (self , file : Union [str , Dict [str , Any ]]) -> str :
285
292
"""Recursively validate a STAC item or catalog file and its child items.
@@ -302,6 +309,7 @@ def recursive_validation(self, file: Union[str, Dict[str, Any]]) -> str:
302
309
max_depth = self .max_depth ,
303
310
assets_open_urls = self .assets_open_urls ,
304
311
headers = self .headers ,
312
+ pydantic = self .pydantic ,
305
313
)
306
314
stac .run ()
307
315
else :
@@ -310,6 +318,7 @@ def recursive_validation(self, file: Union[str, Dict[str, Any]]) -> str:
310
318
max_depth = self .max_depth ,
311
319
assets_open_urls = self .assets_open_urls ,
312
320
headers = self .headers ,
321
+ pydantic = self .pydantic ,
313
322
)
314
323
stac .validate_dict (file )
315
324
return stac .message
@@ -454,16 +463,20 @@ def check_geometry_null(self) -> bool:
454
463
else :
455
464
return False
456
465
457
- def check_bbox_matches_geometry (self ) -> bool :
466
+ def check_bbox_matches_geometry (
467
+ self ,
468
+ ) -> Union [bool , Tuple [bool , List [float ], List [float ], List [float ]]]:
458
469
"""Checks if the bbox of a STAC item matches its geometry.
459
470
460
471
This function verifies that the bounding box (bbox) accurately represents
461
472
the minimum bounding rectangle of the item's geometry. It only applies to
462
473
items with non-null geometry of type Polygon or MultiPolygon.
463
474
464
475
Returns:
465
- bool: True if the bbox matches the geometry or if the check is not applicable
466
- (e.g., null geometry or non-polygon type). False if there's a mismatch.
476
+ Union[bool, Tuple[bool, List[float], List[float], List[float]]]:
477
+ - True if the bbox matches the geometry or if the check is not applicable
478
+ (e.g., null geometry or non-polygon type).
479
+ - When there's a mismatch: a tuple containing (False, calculated_bbox, actual_bbox, differences)
467
480
"""
468
481
# Skip check if geometry is null or bbox is not present
469
482
if (
@@ -504,11 +517,14 @@ def check_bbox_matches_geometry(self) -> bool:
504
517
505
518
calc_bbox = [min (lons ), min (lats ), max (lons ), max (lats )]
506
519
507
- # Allow for small floating point differences (epsilon)
508
- epsilon = 1e-8
509
- for i in range (4 ):
510
- if abs (bbox [i ] - calc_bbox [i ]) > epsilon :
511
- return False
520
+ # Allow for differences that would be invisible when rounded to 6 decimal places
521
+ # 1e-6 would be exactly at the 6th decimal place, so use 5e-7 to be just under that threshold
522
+ epsilon = 5e-7
523
+ differences = [abs (bbox [i ] - calc_bbox [i ]) for i in range (4 )]
524
+
525
+ if any (diff > epsilon for diff in differences ):
526
+ # Return False along with the calculated bbox, actual bbox, and the differences
527
+ return (False , calc_bbox , bbox , differences )
512
528
513
529
return True
514
530
@@ -675,12 +691,60 @@ def create_best_practices_dict(self) -> Dict:
675
691
best_practices_dict ["null_geometry" ] = [msg_1 ]
676
692
677
693
# best practices - check if bbox matches geometry
678
- if (
679
- not self .check_bbox_matches_geometry ()
680
- and config .get ("check_bbox_geometry_match" , True ) == True
681
- ):
682
- msg_1 = "The bbox field does not match the bounds of the geometry. The bbox should be the minimum bounding rectangle of the geometry."
683
- best_practices_dict ["bbox_geometry_mismatch" ] = [msg_1 ]
694
+ bbox_check_result = self .check_bbox_matches_geometry ()
695
+ bbox_mismatch = False
696
+
697
+ if isinstance (bbox_check_result , tuple ):
698
+ bbox_mismatch = not bbox_check_result [0 ]
699
+ else :
700
+ bbox_mismatch = not bbox_check_result
701
+
702
+ if bbox_mismatch and config .get ("check_bbox_geometry_match" , True ) == True :
703
+ if isinstance (bbox_check_result , tuple ):
704
+ # Unpack the result
705
+ _ , calc_bbox , actual_bbox , differences = bbox_check_result
706
+
707
+ # Format the bbox values for display
708
+ calc_bbox_str = ", " .join ([f"{ v :.6f} " for v in calc_bbox ])
709
+ actual_bbox_str = ", " .join ([f"{ v :.6f} " for v in actual_bbox ])
710
+
711
+ # Create a more detailed message about which coordinates differ
712
+ coordinate_labels = [
713
+ "min longitude" ,
714
+ "min latitude" ,
715
+ "max longitude" ,
716
+ "max latitude" ,
717
+ ]
718
+ mismatch_details = []
719
+
720
+ # Use the same epsilon threshold as in check_bbox_matches_geometry
721
+ epsilon = 5e-7
722
+
723
+ for i , (diff , label ) in enumerate (zip (differences , coordinate_labels )):
724
+ if diff > epsilon :
725
+ mismatch_details .append (
726
+ f"{ label } : calculated={ calc_bbox [i ]:.6f} , actual={ actual_bbox [i ]:.6f} , diff={ diff :.7f} "
727
+ )
728
+
729
+ msg_1 = "The bbox field does not match the bounds of the geometry. The bbox should be the minimum bounding rectangle of the geometry."
730
+ msg_2 = f"Calculated bbox from geometry: [{ calc_bbox_str } ]"
731
+ msg_3 = f"Actual bbox in metadata: [{ actual_bbox_str } ]"
732
+
733
+ messages = [msg_1 , msg_2 , msg_3 ]
734
+ if mismatch_details :
735
+ messages .append ("Mismatched coordinates:" )
736
+ messages .extend (mismatch_details )
737
+ else :
738
+ # If we got here but there are no visible differences at 6 decimal places,
739
+ # add a note explaining that the differences are too small to matter
740
+ messages .append (
741
+ "Note: The differences are too small to be visible at 6 decimal places and can be ignored."
742
+ )
743
+
744
+ best_practices_dict ["bbox_geometry_mismatch" ] = messages
745
+ else :
746
+ msg_1 = "The bbox field does not match the bounds of the geometry. The bbox should be the minimum bounding rectangle of the geometry."
747
+ best_practices_dict ["bbox_geometry_mismatch" ] = [msg_1 ]
684
748
685
749
# check to see if there are too many links
686
750
if (
0 commit comments