Skip to content

feature: Add support for in-memory feature groups and collection type features in Feature Store. #4162

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions doc/api/prep_data/feature_store.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,25 @@ Feature Definition
:members:
:show-inheritance:

.. autoclass:: sagemaker.feature_store.feature_definition.CollectionTypeEnum
:members:
:show-inheritance:

.. autoclass:: sagemaker.feature_store.feature_definition.CollectionType
:members:
:show-inheritance:

.. autoclass:: sagemaker.feature_store.feature_definition.ListCollectionType
:members:
:show-inheritance:

.. autoclass:: sagemaker.feature_store.feature_definition.SetCollectionType
:members:
:show-inheritance:

.. autoclass:: sagemaker.feature_store.feature_definition.VectorCollectionType
:members:
:show-inheritance:

Inputs
******
Expand Down Expand Up @@ -77,6 +96,10 @@ Inputs
:members:
:show-inheritance:

.. autoclass:: sagemaker.feature_store.inputs.OnlineStoreStorageTypeEnum
:members:
:show-inheritance:

.. autoclass:: sagemaker.feature_store.inputs.ResourceEnum
:members:
:show-inheritance:
Expand Down
114 changes: 107 additions & 7 deletions src/sagemaker/feature_store/feature_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,87 @@ class FeatureTypeEnum(Enum):
STRING = "String"


class CollectionTypeEnum(Enum):
"""Enum of collection types.

The collection type of a feature can be List, Set or Vector.
"""

LIST = "List"
SET = "Set"
VECTOR = "Vector"


@attr.s
class CollectionType(Config):
"""Collection type and its configuration.

This initiates a collectiontype object where CollectionType is a subclass of Config.

Attributes:
collection_type (CollectionTypeEnum): The type of the collection
collection_config (Dict[str, Any]): The configuration for the collection.
"""

collection_type: CollectionTypeEnum = attr.ib()
collection_config: Dict[str, Any] = attr.ib()

def to_dict(self) -> Dict[str, Any]:
"""Construct a dictionary based on each attribute."""
return Config.construct_dict(
CollectionType=self.collection_type.value, CollectionConfig=self.collection_config
)


class ListCollectionType(CollectionType):
"""List collection type

This class instantiates a ListCollectionType object, as subclass of CollectionType
where the collection type is defined as List.

"""

def __init__(self):
"""Construct an instance of ListCollectionType."""
super(ListCollectionType, self).__init__(CollectionTypeEnum.LIST, None)


class SetCollectionType(CollectionType):
"""Set collection type

This class instantiates a SetCollectionType object, as subclass of CollectionType
where the collection type is defined as Set.

"""

def __init__(self):
"""Construct an instance of SetCollectionType."""
super(SetCollectionType, self).__init__(CollectionTypeEnum.SET, None)


class VectorCollectionType(CollectionType):
"""Vector collection type

This class instantiates a VectorCollectionType object, as subclass of CollectionType
where the collection type is defined as Vector.

Attributes:
dimension (int): The dimension size for the Vector.
"""

def __init__(self, dimension: int):
"""Construct an instance of VectorCollectionType.

Attributes:
dimension (int): The dimension size for the Vector.
"""
collection_config: Dict[str, Any] = {}
vector_config: Dict[str, Any] = {}
vector_config["Dimension"] = dimension
collection_config["VectorConfig"] = vector_config
super(VectorCollectionType, self).__init__(CollectionTypeEnum.VECTOR, collection_config)


@attr.s
class FeatureDefinition(Config):
"""Feature definition.
Expand All @@ -48,15 +129,25 @@ class FeatureDefinition(Config):
Attributes:
feature_name (str): The name of the feature
feature_type (FeatureTypeEnum): The type of the feature
collection_type (CollectionType): The type of collection for the feature
"""

feature_name: str = attr.ib()
feature_type: FeatureTypeEnum = attr.ib()
collection_type: CollectionType = attr.ib(default=None)

def to_dict(self) -> Dict[str, Any]:
"""Construct a dictionary based on each attribute."""

return Config.construct_dict(
FeatureName=self.feature_name, FeatureType=self.feature_type.value
FeatureName=self.feature_name,
FeatureType=self.feature_type.value,
CollectionType=(
self.collection_type.collection_type.value if self.collection_type else None
),
CollectionConfig=(
self.collection_type.collection_config if self.collection_type else None
),
)


Expand All @@ -69,15 +160,18 @@ class FractionalFeatureDefinition(FeatureDefinition):
Attributes:
feature_name (str): The name of the feature
feature_type (FeatureTypeEnum): A `FeatureTypeEnum.FRACTIONAL` type
collection_type (CollectionType): The type of collection for the feature
"""

def __init__(self, feature_name: str):
def __init__(self, feature_name: str, collection_type: CollectionType = None):
"""Construct an instance of FractionalFeatureDefinition.

Args:
feature_name (str): the name of the feature.
"""
super(FractionalFeatureDefinition, self).__init__(feature_name, FeatureTypeEnum.FRACTIONAL)
super(FractionalFeatureDefinition, self).__init__(
feature_name, FeatureTypeEnum.FRACTIONAL, collection_type
)


class IntegralFeatureDefinition(FeatureDefinition):
Expand All @@ -89,15 +183,18 @@ class IntegralFeatureDefinition(FeatureDefinition):
Attributes:
feature_name (str): the name of the feature.
feature_type (FeatureTypeEnum): a `FeatureTypeEnum.INTEGRAL` type.
collection_type (CollectionType): The type of collection for the feature.
"""

def __init__(self, feature_name: str):
def __init__(self, feature_name: str, collection_type: CollectionType = None):
"""Construct an instance of IntegralFeatureDefinition.

Args:
feature_name (str): the name of the feature.
"""
super(IntegralFeatureDefinition, self).__init__(feature_name, FeatureTypeEnum.INTEGRAL)
super(IntegralFeatureDefinition, self).__init__(
feature_name, FeatureTypeEnum.INTEGRAL, collection_type
)


class StringFeatureDefinition(FeatureDefinition):
Expand All @@ -109,12 +206,15 @@ class StringFeatureDefinition(FeatureDefinition):
Attributes:
feature_name (str): the name of the feature.
feature_type (FeatureTypeEnum): a `FeatureTypeEnum.STRING` type.
collection_type (CollectionType): The type of collection for the feature.
"""

def __init__(self, feature_name: str):
def __init__(self, feature_name: str, collection_type: CollectionType = None):
"""Construct an instance of StringFeatureDefinition.

Args:
feature_name (str): the name of the feature.
"""
super(StringFeatureDefinition, self).__init__(feature_name, FeatureTypeEnum.STRING)
super(StringFeatureDefinition, self).__init__(
feature_name, FeatureTypeEnum.STRING, collection_type
)
5 changes: 5 additions & 0 deletions src/sagemaker/feature_store/feature_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
DeletionModeEnum,
TtlDuration,
OnlineStoreConfigUpdate,
OnlineStoreStorageTypeEnum,
)
from sagemaker.utils import resolve_value_from_config

Expand Down Expand Up @@ -539,6 +540,7 @@ def create(
description: str = None,
tags: List[Dict[str, str]] = None,
table_format: TableFormatEnum = None,
online_store_storage_type: OnlineStoreStorageTypeEnum = None,
) -> Dict[str, Any]:
"""Create a SageMaker FeatureStore FeatureGroup.

Expand Down Expand Up @@ -566,6 +568,8 @@ def create(
description (str): description of the FeatureGroup (default: None).
tags (List[Dict[str, str]]): list of tags for labeling a FeatureGroup (default: None).
table_format (TableFormatEnum): format of the offline store table (default: None).
online_store_storage_type (OnlineStoreStorageTypeEnum): storage type for the
online store (default: None).

Returns:
Response dict from service.
Expand Down Expand Up @@ -606,6 +610,7 @@ def create(
online_store_config = OnlineStoreConfig(
enable_online_store=enable_online_store,
ttl_duration=ttl_duration,
storage_type=online_store_storage_type,
)
if online_store_kms_key_id is not None:
online_store_config.online_store_security_config = OnlineStoreSecurityConfig(
Expand Down
16 changes: 16 additions & 0 deletions src/sagemaker/feature_store/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,16 @@ def to_dict(self) -> Dict[str, Any]:
)


class OnlineStoreStorageTypeEnum(Enum):
"""Enum of storage types for online store.

The online store storage types can be Standard or InMemory.
"""

STANDARD = "Standard"
IN_MEMORY = "InMemory"


@attr.s
class OnlineStoreConfig(Config):
"""OnlineStoreConfig for FeatureStore.
Expand All @@ -121,6 +131,7 @@ class OnlineStoreConfig(Config):
enable_online_store: bool = attr.ib(default=True)
online_store_security_config: OnlineStoreSecurityConfig = attr.ib(default=None)
ttl_duration: TtlDuration = attr.ib(default=None)
storage_type: OnlineStoreStorageTypeEnum = attr.ib(default=None)

def to_dict(self) -> Dict[str, Any]:
"""Construct a dictionary based on the attributes.
Expand All @@ -132,6 +143,7 @@ def to_dict(self) -> Dict[str, Any]:
EnableOnlineStore=self.enable_online_store,
SecurityConfig=self.online_store_security_config,
TtlDuration=self.ttl_duration,
StorageType=self.storage_type.value if self.storage_type else None,
)


Expand Down Expand Up @@ -254,10 +266,13 @@ class FeatureValue(Config):
Attributes:
feature_name (str): name of the Feature.
value_as_string (str): value of the Feature in string form.
value_as_string_list (List[str]): value of the Feature in string list
form used for collection type.
"""

feature_name: str = attr.ib(default=None)
value_as_string: str = attr.ib(default=None)
value_as_string_list: List[str] = attr.ib(default=None)

def to_dict(self) -> Dict[str, Any]:
"""Construct a dictionary based on the attributes provided.
Expand All @@ -268,6 +283,7 @@ def to_dict(self) -> Dict[str, Any]:
return Config.construct_dict(
FeatureName=self.feature_name,
ValueAsString=self.value_as_string,
ValueAsStringList=self.value_as_string_list,
)


Expand Down
Loading