-
Notifications
You must be signed in to change notification settings - Fork 30
Adjust ItemProperties Validation. #131
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e4a284f
ed35e75
b026dc7
dd54306
af87bcf
28461bf
bd913a8
64360f4
202e33b
2aa30eb
f4426b4
72d1ace
3e6648d
8b7881b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,12 +20,7 @@ keywords=["stac", "pydantic", "validation"] | |
authors=[{ name = "Arturo Engineering", email = "[email protected]"}] | ||
license= { text = "MIT" } | ||
requires-python=">=3.8" | ||
dependencies = [ | ||
"click>=8.1.7", | ||
"pydantic>=2.4.1", | ||
"geojson-pydantic>=1.0.0", | ||
"ciso8601~=2.3", | ||
] | ||
dependencies = ["click>=8.1.7", "pydantic>=2.4.1", "geojson-pydantic>=1.0.0"] | ||
dynamic = ["version", "readme"] | ||
|
||
[project.scripts] | ||
|
@@ -37,7 +32,6 @@ repository ="https://github.com/stac-utils/stac-pydantic.git" | |
|
||
[project.optional-dependencies] | ||
dev = [ | ||
"arrow>=1.2.3", | ||
"pytest>=7.4.2", | ||
"pytest-cov>=4.1.0", | ||
"pytest-icdiff>=0.8", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,24 +1,15 @@ | ||
from datetime import datetime as dt | ||
from typing import Any, Dict, List, Optional, Union | ||
from typing import Any, Dict, List, Optional | ||
|
||
from ciso8601 import parse_rfc3339 | ||
from geojson_pydantic import Feature | ||
from pydantic import ( | ||
AnyUrl, | ||
ConfigDict, | ||
Field, | ||
field_serializer, | ||
model_serializer, | ||
model_validator, | ||
) | ||
from pydantic import AnyUrl, ConfigDict, Field, model_serializer, model_validator | ||
|
||
from stac_pydantic.links import Links | ||
from stac_pydantic.shared import ( | ||
DATETIME_RFC339, | ||
SEMVER_REGEX, | ||
Asset, | ||
StacBaseModel, | ||
StacCommonMetadata, | ||
UtcDatetime, | ||
) | ||
from stac_pydantic.version import STAC_VERSION | ||
|
||
|
@@ -28,39 +19,12 @@ class ItemProperties(StacCommonMetadata): | |
https://github.com/radiantearth/stac-spec/blob/v1.0.0/item-spec/item-spec.md#properties-object | ||
""" | ||
|
||
datetime: Union[dt, str] = Field(..., alias="datetime") | ||
# Overide the datetime field to be required | ||
datetime: Optional[UtcDatetime] | ||
|
||
eseglem marked this conversation as resolved.
Show resolved
Hide resolved
vincentsarago marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# Check https://docs.pydantic.dev/dev-v2/migration/#changes-to-config for more information. | ||
model_config = ConfigDict(extra="allow") | ||
|
||
@model_validator(mode="before") | ||
@classmethod | ||
def validate_datetime(cls, data: Dict[str, Any]) -> Dict[str, Any]: | ||
datetime = data.get("datetime") | ||
start_datetime = data.get("start_datetime") | ||
end_datetime = data.get("end_datetime") | ||
|
||
if not datetime or datetime == "null": | ||
if not start_datetime and not end_datetime: | ||
raise ValueError( | ||
"start_datetime and end_datetime must be specified when datetime is null" | ||
) | ||
|
||
if isinstance(datetime, str): | ||
data["datetime"] = parse_rfc3339(datetime) | ||
|
||
if isinstance(start_datetime, str): | ||
data["start_datetime"] = parse_rfc3339(start_datetime) | ||
|
||
if isinstance(end_datetime, str): | ||
data["end_datetime"] = parse_rfc3339(end_datetime) | ||
|
||
return data | ||
|
||
@field_serializer("datetime") | ||
def serialize_datetime(self, v: dt, _info: Any) -> str: | ||
return v.strftime(DATETIME_RFC339) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we still need the serializer to make sure we respect the RFC3339 spec This could be one implementation (to make sure we keep microseconds, if provided) @field_serializer("datetime", "start_datetime", "end_datetime", when_used="always")
def serialize_datetime(self, v: Optional[dt], _info: Any) -> str:
if v:
if str(v)[19] == ".":
return v.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
else:
return v.strftime("%Y-%m-%dT%H:%M:%SZ") There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Assuming the datetime object has all the required values, then the pydantic output will always be valid RFC3339. But that is not always the case. Pydantic allows in time without seconds, but then the output would not be valid. If input is validated through If, however, we want to accept a wider range of inputs (
And the use of from ciso8601 import parse_rfc3339
from datetime import datetime, timezone
from pydantic import TypeAdapter
d = parse_rfc3339("2011-11-04T00:05:23.283+04:00")
d.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
# '2011-11-04T00:05:23.283000Z' <- Not actually the correct timezone
d.isoformat()
# '2011-11-04T00:05:23.283000+04:00'
d.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
# '2011-11-03T20:05:23.283000Z'
d.astimezone(timezone.utc).isoformat()
# '2011-11-03T20:05:23.283000+00:00'
TypeAdapter(datetime).dump_json(d)
# b'"2011-11-04T00:05:23.283000+04:00"'
TypeAdapter(datetime).dump_json(d.astimezone(timezone.utc))
# b'"2011-11-03T20:05:23.283000Z"' All of those are valid RFC3339 outputs, but one of them is lying about the timezone. And now looking back at https://github.com/radiantearth/stac-spec/blob/v1.0.0/item-spec/item-spec.md#properties-object seems like there is actually a bit more of an issue. The spec requires UTC on top of the RFC3339. So even with I see a few possibilities: def parse_rfc3339_asutc(value: Any) -> datetime:
return parse_rfc3339(value).astimezone(timezone.utc)
def parse_utc_rfc3339(value: Any) -> datetime:
d = parse_rfc3339(value)
if not d.tzinfo == timezone.utc:
raise ValueError("Input must be UTC")
return d
# Flexible input as long as there is a timezone. Converts to UTC. Use isoformat to get valid RFC format.
Annotated[AwareDatetime, AfterValidator(lambda d: d.astimezone(timezone.utc)), PlainSerializer(lambda d: d.isoformat())]
# Input must be RFC, any timzone. Converts to UTC. Pydantic serializer outputs valid RFC format.
Annotated[datetime, BeforeValidator(parse_rfc3339_asutc)]
# Input must be RFC and UTC. Pydantic serializer outputs valid RFC format.
Annotated[datetime, BeforeValidator(parse_utc_rfc3339)] I would probably still go with Pydantic for parsing. Gives the most flexibility on input, and can still guarantee valid output. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with @gadomski comments below in regards to "be permissive for inputs, strict on outputs." This has the advantage that any date format can be accepted including I added a possible solution in here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @thomas-maschler if going the The way it is current written in this PR is to accept a datetime with a timezone, and convert it to UTC. Rather than accept a naive value and assume UTC. Though the spec here does say it must be in UTC. So, its not a horrible thing to assume either. Tons of possible solutions. I was trying to optimize on keeping as much as possible inside the pydantic rust code and minimal python code to maintain here. Its funny how a spec trying to simplify things or add clarity can make things more complicated than necessary. |
||
|
||
class Item(Feature, StacBaseModel): | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,17 @@ | ||
from datetime import datetime | ||
from datetime import timezone | ||
from enum import Enum, auto | ||
from typing import Any, Dict, List, Optional, Tuple, Union | ||
from warnings import warn | ||
|
||
from pydantic import BaseModel, ConfigDict, Field | ||
from pydantic import ( | ||
AfterValidator, | ||
AwareDatetime, | ||
BaseModel, | ||
ConfigDict, | ||
Field, | ||
model_validator, | ||
) | ||
from typing_extensions import Annotated, Self | ||
|
||
from stac_pydantic.utils import AutoValueEnum | ||
|
||
|
@@ -15,9 +23,14 @@ | |
|
||
SEMVER_REGEX = r"^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$" | ||
|
||
# https://tools.ietf.org/html/rfc3339#section-5.6 | ||
# Unused, but leaving it here since it's used by dependencies | ||
DATETIME_RFC339 = "%Y-%m-%dT%H:%M:%SZ" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed from the tests. And, if there are downstream users which use this outside of that, they should do their own formatting, as there are issues with this format. Its missing |
||
# Allows for some additional flexibility in the input datetime format. As long as | ||
# the input value has timezone information, it will be converted to UTC timezone. | ||
UtcDatetime = Annotated[ | ||
# Input value must be in a format which has timezone information | ||
AwareDatetime, | ||
# Convert the input value to UTC timezone | ||
AfterValidator(lambda d: d.astimezone(timezone.utc)), | ||
] | ||
|
||
|
||
class MimeTypes(str, Enum): | ||
|
@@ -106,41 +119,76 @@ class Provider(StacBaseModel): | |
https://github.com/radiantearth/stac-spec/blob/v1.0.0/collection-spec/collection-spec.md#provider-object | ||
""" | ||
|
||
name: str = Field(..., alias="name", min_length=1) | ||
name: str = Field(..., min_length=1) | ||
description: Optional[str] = None | ||
roles: Optional[List[str]] = None | ||
url: Optional[str] = None | ||
|
||
|
||
class StacCommonMetadata(StacBaseModel): | ||
""" | ||
https://github.com/radiantearth/stac-spec/blob/v1.0.0/item-spec/common-metadata.md#date-and-time-range | ||
https://github.com/radiantearth/stac-spec/blob/v1.0.0/item-spec/common-metadata.md | ||
""" | ||
|
||
title: Optional[str] = Field(None, alias="title") | ||
description: Optional[str] = Field(None, alias="description") | ||
start_datetime: Optional[datetime] = Field(None, alias="start_datetime") | ||
end_datetime: Optional[datetime] = Field(None, alias="end_datetime") | ||
created: Optional[datetime] = Field(None, alias="created") | ||
updated: Optional[datetime] = Field(None, alias="updated") | ||
platform: Optional[str] = Field(None, alias="platform") | ||
instruments: Optional[List[str]] = Field(None, alias="instruments") | ||
constellation: Optional[str] = Field(None, alias="constellation") | ||
mission: Optional[str] = Field(None, alias="mission") | ||
providers: Optional[List[Provider]] = Field(None, alias="providers") | ||
gsd: Optional[float] = Field(None, alias="gsd", gt=0) | ||
# Basic | ||
title: Optional[str] = None | ||
description: Optional[str] = None | ||
# Date and Time | ||
datetime: Optional[UtcDatetime] = None | ||
created: Optional[UtcDatetime] = None | ||
updated: Optional[UtcDatetime] = None | ||
# Date and Time Range | ||
start_datetime: Optional[UtcDatetime] = None | ||
end_datetime: Optional[UtcDatetime] = None | ||
# Provider | ||
providers: Optional[List[Provider]] = None | ||
# Instrument | ||
platform: Optional[str] = None | ||
instruments: Optional[List[str]] = None | ||
constellation: Optional[str] = None | ||
mission: Optional[str] = None | ||
gsd: Optional[float] = Field(None, gt=0) | ||
|
||
@model_validator(mode="after") | ||
def validate_datetime_or_start_end(self) -> Self: | ||
# When datetime is null, start_datetime and end_datetime must be specified | ||
if not self.datetime and (not self.start_datetime or not self.end_datetime): | ||
raise ValueError( | ||
"start_datetime and end_datetime must be specified when datetime is null" | ||
) | ||
|
||
return self | ||
|
||
@model_validator(mode="after") | ||
def validate_start_end(self) -> Self: | ||
# Using one of start_datetime or end_datetime requires the use of the other | ||
if (self.start_datetime and not self.end_datetime) or ( | ||
not self.start_datetime and self.end_datetime | ||
): | ||
raise ValueError( | ||
"use of start_datetime or end_datetime requires the use of the other" | ||
) | ||
return self | ||
|
||
|
||
class Asset(StacCommonMetadata): | ||
""" | ||
https://github.com/radiantearth/stac-spec/blob/v1.0.0/item-spec/item-spec.md#asset-object | ||
""" | ||
|
||
href: str = Field(..., alias="href", min_length=1) | ||
href: str = Field(..., min_length=1) | ||
type: Optional[str] = None | ||
title: Optional[str] = None | ||
description: Optional[str] = None | ||
roles: Optional[List[str]] = None | ||
|
||
model_config = ConfigDict( | ||
populate_by_name=True, use_enum_values=True, extra="allow" | ||
) | ||
|
||
@model_validator(mode="after") | ||
def validate_datetime_or_start_end(self) -> Self: | ||
# Overriding the parent method to avoid requiring datetime or start/end_datetime | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. well IMO the Asset model should not herit from the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we can do this in another PR |
||
# Additional fields MAY be added on the Asset object, but are not required. | ||
# https://github.com/radiantearth/stac-spec/blob/v1.0.0/item-spec/item-spec.md#additional-fields-for-assets | ||
return self |
Uh oh!
There was an error while loading. Please reload this page.