Skip to content

Commit 8348c7d

Browse files
AWS-pratabmufaddal-rohawala
authored andcommitted
feature: support table format option for create feature group.
Co-authored-by: Mufaddal Rohawala <[email protected]>
1 parent 7391fa1 commit 8348c7d

File tree

5 files changed

+171
-9
lines changed

5 files changed

+171
-9
lines changed

src/sagemaker/feature_store/feature_group.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
DataCatalogConfig,
5555
FeatureValue,
5656
FeatureParameter,
57+
TableFormatEnum,
5758
)
5859

5960
logger = logging.getLogger(__name__)
@@ -461,6 +462,7 @@ def create(
461462
data_catalog_config: DataCatalogConfig = None,
462463
description: str = None,
463464
tags: List[Dict[str, str]] = None,
465+
table_format: TableFormatEnum = None,
464466
) -> Dict[str, Any]:
465467
"""Create a SageMaker FeatureStore FeatureGroup.
466468
@@ -470,20 +472,23 @@ def create(
470472
record_identifier_name (str): name of the record identifier feature.
471473
event_time_feature_name (str): name of the event time feature.
472474
role_arn (str): ARN of the role used to call CreateFeatureGroup.
473-
online_store_kms_key_id (str): KMS key id for online store.
474-
enable_online_store (bool): whether to enable online store or not.
475-
offline_store_kms_key_id (str): KMS key id for offline store.
475+
online_store_kms_key_id (str): KMS key id for online store (default: None).
476+
enable_online_store (bool): whether to enable online store or not (default: False).
477+
offline_store_kms_key_id (str): KMS key id for offline store (default: None).
476478
If a KMS encryption key is not specified, SageMaker encrypts all data at
477479
rest using the default AWS KMS key. By defining your bucket-level key for
478480
SSE, you can reduce the cost of AWS KMS requests.
479481
For more information, see
480482
`Bucket Key
481483
<https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucket-key.html>`_
482484
in the Amazon S3 User Guide.
483-
disable_glue_table_creation (bool): whether to turn off Glue table creation no not.
484-
data_catalog_config (DataCatalogConfig): configuration for Metadata store.
485-
description (str): description of the FeatureGroup.
486-
tags (List[Dict[str, str]]): list of tags for labeling a FeatureGroup.
485+
disable_glue_table_creation (bool): whether to turn off Glue table creation
486+
or not (default: False).
487+
data_catalog_config (DataCatalogConfig): configuration for
488+
Metadata store (default: None).
489+
description (str): description of the FeatureGroup (default: None).
490+
tags (List[Dict[str, str]]): list of tags for labeling a FeatureGroup (default: None).
491+
table_format (TableFormatEnum): format of the offline store table (default: None).
487492
488493
Returns:
489494
Response dict from service.
@@ -518,6 +523,7 @@ def create(
518523
s3_storage_config=s3_storage_config,
519524
disable_glue_table_creation=disable_glue_table_creation,
520525
data_catalog_config=data_catalog_config,
526+
table_format=table_format,
521527
)
522528
create_feature_store_args.update(
523529
{"offline_store_config": offline_store_config.to_dict()}

src/sagemaker/feature_store/inputs.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030

3131
import abc
3232
from typing import Dict, Any
33+
from enum import Enum
3334

3435
import attr
3536

@@ -158,6 +159,16 @@ def to_dict(self) -> Dict[str, Any]:
158159
)
159160

160161

162+
class TableFormatEnum(Enum):
163+
"""Enum of table formats.
164+
165+
The offline store table formats can be Glue or Iceberg.
166+
"""
167+
168+
GLUE = "Glue"
169+
ICEBERG = "Iceberg"
170+
171+
161172
@attr.s
162173
class OfflineStoreConfig(Config):
163174
"""OfflineStoreConfig for FeatureStore.
@@ -166,11 +177,13 @@ class OfflineStoreConfig(Config):
166177
s3_storage_config (S3StorageConfig): configuration of S3 storage.
167178
disable_glue_table_creation (bool): whether to disable the Glue table creation.
168179
data_catalog_config (DataCatalogConfig): configuration of the data catalog.
180+
table_format (TableFormatEnum): format of the offline store table.
169181
"""
170182

171183
s3_storage_config: S3StorageConfig = attr.ib()
172184
disable_glue_table_creation: bool = attr.ib(default=False)
173185
data_catalog_config: DataCatalogConfig = attr.ib(default=None)
186+
table_format: TableFormatEnum = attr.ib(default=None)
174187

175188
def to_dict(self) -> Dict[str, Any]:
176189
"""Construct a dictionary based on the attributes.
@@ -182,6 +195,7 @@ def to_dict(self) -> Dict[str, Any]:
182195
DisableGlueTableCreation=self.disable_glue_table_creation,
183196
S3StorageConfig=self.s3_storage_config,
184197
DataCatalogConfig=self.data_catalog_config,
198+
TableFormat=self.table_format.value if self.table_format else None,
185199
)
186200

187201

tests/integ/test_feature_store.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525
from sagemaker.feature_store.feature_definition import FractionalFeatureDefinition
2626
from sagemaker.feature_store.feature_group import FeatureGroup
27-
from sagemaker.feature_store.inputs import FeatureValue, FeatureParameter
27+
from sagemaker.feature_store.inputs import FeatureValue, FeatureParameter, TableFormatEnum
2828
from sagemaker.session import get_execution_role, Session
2929
from tests.integ.timeout import timeout
3030

@@ -238,6 +238,56 @@ def test_create_feature_store(
238238
assert output["FeatureGroupArn"].endswith(f"feature-group/{feature_group_name}")
239239

240240

241+
def test_create_feature_group_iceberg_table_format(
242+
feature_store_session,
243+
role,
244+
feature_group_name,
245+
offline_store_s3_uri,
246+
pandas_data_frame,
247+
):
248+
feature_group = FeatureGroup(name=feature_group_name, sagemaker_session=feature_store_session)
249+
feature_group.load_feature_definitions(data_frame=pandas_data_frame)
250+
251+
with cleanup_feature_group(feature_group):
252+
feature_group.create(
253+
s3_uri=offline_store_s3_uri,
254+
record_identifier_name="feature1",
255+
event_time_feature_name="feature3",
256+
role_arn=role,
257+
enable_online_store=True,
258+
table_format=TableFormatEnum.ICEBERG,
259+
)
260+
_wait_for_feature_group_create(feature_group)
261+
262+
table_format = feature_group.describe().get("OfflineStoreConfig").get("TableFormat")
263+
assert table_format == "Iceberg"
264+
265+
266+
def test_create_feature_group_glue_table_format(
267+
feature_store_session,
268+
role,
269+
feature_group_name,
270+
offline_store_s3_uri,
271+
pandas_data_frame,
272+
):
273+
feature_group = FeatureGroup(name=feature_group_name, sagemaker_session=feature_store_session)
274+
feature_group.load_feature_definitions(data_frame=pandas_data_frame)
275+
276+
with cleanup_feature_group(feature_group):
277+
feature_group.create(
278+
s3_uri=offline_store_s3_uri,
279+
record_identifier_name="feature1",
280+
event_time_feature_name="feature3",
281+
role_arn=role,
282+
enable_online_store=True,
283+
table_format=TableFormatEnum.GLUE,
284+
)
285+
_wait_for_feature_group_create(feature_group)
286+
287+
table_format = feature_group.describe().get("OfflineStoreConfig").get("TableFormat")
288+
assert table_format == "Glue"
289+
290+
241291
def test_update_feature_group(
242292
feature_store_session,
243293
role,

tests/unit/sagemaker/feature_store/test_feature_store.py

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@
3131
AthenaQuery,
3232
IngestionError,
3333
)
34-
from sagemaker.feature_store.inputs import FeatureParameter
34+
from sagemaker.feature_store.inputs import (
35+
FeatureParameter,
36+
TableFormatEnum,
37+
)
3538

3639

3740
class PicklableMock(Mock):
@@ -115,6 +118,68 @@ def test_feature_store_create(
115118
)
116119

117120

121+
def test_feature_store_create_iceberg_table_format(
122+
sagemaker_session_mock, role_arn, feature_group_dummy_definitions, s3_uri
123+
):
124+
feature_group = FeatureGroup(name="MyFeatureGroup", sagemaker_session=sagemaker_session_mock)
125+
feature_group.feature_definitions = feature_group_dummy_definitions
126+
feature_group.create(
127+
s3_uri=s3_uri,
128+
record_identifier_name="feature1",
129+
event_time_feature_name="feature2",
130+
role_arn=role_arn,
131+
enable_online_store=True,
132+
disable_glue_table_creation=False,
133+
table_format=TableFormatEnum.ICEBERG,
134+
)
135+
sagemaker_session_mock.create_feature_group.assert_called_with(
136+
feature_group_name="MyFeatureGroup",
137+
record_identifier_name="feature1",
138+
event_time_feature_name="feature2",
139+
feature_definitions=[fd.to_dict() for fd in feature_group_dummy_definitions],
140+
role_arn=role_arn,
141+
description=None,
142+
tags=None,
143+
online_store_config={"EnableOnlineStore": True},
144+
offline_store_config={
145+
"DisableGlueTableCreation": False,
146+
"TableFormat": "Iceberg",
147+
"S3StorageConfig": {"S3Uri": s3_uri},
148+
},
149+
)
150+
151+
152+
def test_feature_store_create_glue_table_format(
153+
sagemaker_session_mock, role_arn, feature_group_dummy_definitions, s3_uri
154+
):
155+
feature_group = FeatureGroup(name="MyFeatureGroup", sagemaker_session=sagemaker_session_mock)
156+
feature_group.feature_definitions = feature_group_dummy_definitions
157+
feature_group.create(
158+
s3_uri=s3_uri,
159+
record_identifier_name="feature1",
160+
event_time_feature_name="feature2",
161+
role_arn=role_arn,
162+
enable_online_store=True,
163+
disable_glue_table_creation=False,
164+
table_format=TableFormatEnum.GLUE,
165+
)
166+
sagemaker_session_mock.create_feature_group.assert_called_with(
167+
feature_group_name="MyFeatureGroup",
168+
record_identifier_name="feature1",
169+
event_time_feature_name="feature2",
170+
feature_definitions=[fd.to_dict() for fd in feature_group_dummy_definitions],
171+
role_arn=role_arn,
172+
description=None,
173+
tags=None,
174+
online_store_config={"EnableOnlineStore": True},
175+
offline_store_config={
176+
"DisableGlueTableCreation": False,
177+
"TableFormat": "Glue",
178+
"S3StorageConfig": {"S3Uri": s3_uri},
179+
},
180+
)
181+
182+
118183
def test_feature_store_create_online_only(
119184
sagemaker_session_mock, role_arn, feature_group_dummy_definitions
120185
):

tests/unit/sagemaker/feature_store/test_inputs.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
DataCatalogConfig,
2121
OfflineStoreConfig,
2222
FeatureParameter,
23+
TableFormatEnum,
2324
)
2425

2526

@@ -86,6 +87,32 @@ def test_offline_data_store_config():
8687
)
8788

8889

90+
def test_offline_data_store_config_with_glue_table_format():
91+
config = OfflineStoreConfig(
92+
s3_storage_config=S3StorageConfig(s3_uri="uri"), table_format=TableFormatEnum.GLUE
93+
)
94+
assert ordered(config.to_dict()) == ordered(
95+
{
96+
"S3StorageConfig": {"S3Uri": "uri"},
97+
"DisableGlueTableCreation": False,
98+
"TableFormat": "Glue",
99+
}
100+
)
101+
102+
103+
def test_offline_data_store_config_with_iceberg_table_format():
104+
config = OfflineStoreConfig(
105+
s3_storage_config=S3StorageConfig(s3_uri="uri"), table_format=TableFormatEnum.ICEBERG
106+
)
107+
assert ordered(config.to_dict()) == ordered(
108+
{
109+
"S3StorageConfig": {"S3Uri": "uri"},
110+
"DisableGlueTableCreation": False,
111+
"TableFormat": "Iceberg",
112+
}
113+
)
114+
115+
89116
def test_feature_metadata():
90117
config = FeatureParameter(key="key", value="value")
91118
assert ordered(config.to_dict()) == ordered({"Key": "key", "Value": "value"})

0 commit comments

Comments
 (0)