Skip to content

feature: support different types of deletion mode #3786

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/sagemaker/feature_store/feature_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
FeatureValue,
FeatureParameter,
TableFormatEnum,
DeletionModeEnum,
)
from sagemaker.utils import resolve_value_from_config

Expand Down Expand Up @@ -785,6 +786,7 @@ def delete_record(
self,
record_identifier_value_as_string: str,
event_time: str,
deletion_mode: DeletionModeEnum = DeletionModeEnum.SOFT_DELETE,
):
"""Delete a single record from a FeatureGroup.

Expand All @@ -793,11 +795,15 @@ def delete_record(
a String representing the value of the record identifier.
event_time (String):
a timestamp format String indicating when the deletion event occurred.
deletion_mode (DeletionModeEnum):
deletion mode for deleting record. (default: DetectionModeEnum.SOFT_DELETE)
"""

return self.sagemaker_session.delete_record(
feature_group_name=self.name,
record_identifier_value_as_string=record_identifier_value_as_string,
event_time=event_time,
deletion_mode=deletion_mode.value,
)

def ingest(
Expand Down
10 changes: 10 additions & 0 deletions src/sagemaker/feature_store/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,3 +369,13 @@ def to_dict(self) -> Dict[str, Any]:
RecordIdentifiersValueAsString=self.record_identifiers_value_as_string,
FeatureNames=None if not self.feature_names else self.feature_names,
)


class DeletionModeEnum(Enum):
"""Enum of deletion modes.

The deletion mode for deleting records can be SoftDelete or HardDelete.
"""

SOFT_DELETE = "SoftDelete"
HARD_DELETE = "HardDelete"
3 changes: 3 additions & 0 deletions src/sagemaker/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -5196,18 +5196,21 @@ def delete_record(
feature_group_name: str,
record_identifier_value_as_string: str,
event_time: str,
deletion_mode: str = None,
):
"""Deletes a single record from the FeatureGroup.

Args:
feature_group_name (str): name of the FeatureGroup.
record_identifier_value_as_string (str): name of the record identifier.
event_time (str): a timestamp indicating when the deletion event occurred.
deletion_mode: (str): deletion mode for deleting record.
"""
return self.sagemaker_featurestore_runtime_client.delete_record(
FeatureGroupName=feature_group_name,
RecordIdentifierValueAsString=record_identifier_value_as_string,
EventTime=event_time,
DeletionMode=deletion_mode,
)

def get_record(
Expand Down
61 changes: 60 additions & 1 deletion tests/integ/test_feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
Filter,
ResourceEnum,
Identifier,
DeletionModeEnum,
)
from sagemaker.feature_store.dataset_builder import (
JoinTypeEnum,
Expand Down Expand Up @@ -162,6 +163,15 @@ def pandas_data_frame_without_string():
return df


@pytest.fixture
def historic_record():
return [
FeatureValue(feature_name="feature1", value_as_string="10.0"),
FeatureValue(feature_name="feature2", value_as_string="7"),
FeatureValue(feature_name="feature3", value_as_string="2020-10-29T03:43:21Z"),
]


@pytest.fixture
def record():
return [
Expand Down Expand Up @@ -398,7 +408,7 @@ def test_get_and_batch_get_record(
assert feature["FeatureName"] is not removed_feature_name


def test_delete_record(
def test_soft_delete_record(
feature_store_session,
role,
feature_group_name,
Expand Down Expand Up @@ -437,6 +447,55 @@ def test_delete_record(
assert retrieved_record is None


def test_hard_delete_record(
feature_store_session,
role,
feature_group_name,
pandas_data_frame,
historic_record,
record,
):
feature_group = FeatureGroup(name=feature_group_name, sagemaker_session=feature_store_session)
feature_group.load_feature_definitions(data_frame=pandas_data_frame)

record_identifier_value_as_string = record[0].value_as_string
historic_record_identifier_value_as_string = historic_record[0].value_as_string
with cleanup_feature_group(feature_group):
feature_group.create(
s3_uri=False,
record_identifier_name="feature1",
event_time_feature_name="feature3",
role_arn=role,
enable_online_store=True,
)
_wait_for_feature_group_create(feature_group)
# Ingest data
feature_group.put_record(record=record)
# Retrieve data
retrieved_record = feature_group.get_record(
record_identifier_value_as_string=record_identifier_value_as_string,
)
assert retrieved_record is not None
# Delete data
feature_group.delete_record(
record_identifier_value_as_string=record_identifier_value_as_string,
event_time=datetime.datetime.now().replace(microsecond=0).isoformat() + "Z",
deletion_mode=DeletionModeEnum.HARD_DELETE,
)
# Retrieve data
retrieved_record = feature_group.get_record(
record_identifier_value_as_string=record_identifier_value_as_string,
)
assert retrieved_record is None
# Ingest data
feature_group.put_record(historic_record)
# Retrieve data
retrieved_record = feature_group.get_record(
record_identifier_value_as_string=historic_record_identifier_value_as_string,
)
assert retrieved_record is not None


def test_update_feature_group(
feature_store_session,
role,
Expand Down
37 changes: 36 additions & 1 deletion tests/unit/sagemaker/feature_store/test_feature_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
AthenaQuery,
IngestionError,
)
from sagemaker.feature_store.inputs import FeatureParameter
from sagemaker.feature_store.inputs import FeatureParameter, DeletionModeEnum

from tests.unit import SAGEMAKER_CONFIG_FEATURE_GROUP

Expand Down Expand Up @@ -296,6 +296,41 @@ def test_delete_record(sagemaker_session_mock):
feature_group_name="MyFeatureGroup",
record_identifier_value_as_string=record_identifier_value_as_string,
event_time=event_time,
deletion_mode=DeletionModeEnum.SOFT_DELETE.value,
)


def test_soft_delete_record(sagemaker_session_mock):
feature_group = FeatureGroup(name="MyFeatureGroup", sagemaker_session=sagemaker_session_mock)
record_identifier_value_as_string = "1.0"
event_time = "2022-09-14"
feature_group.delete_record(
record_identifier_value_as_string=record_identifier_value_as_string,
event_time=event_time,
deletion_mode=DeletionModeEnum.SOFT_DELETE,
)
sagemaker_session_mock.delete_record.assert_called_with(
feature_group_name="MyFeatureGroup",
record_identifier_value_as_string=record_identifier_value_as_string,
event_time=event_time,
deletion_mode=DeletionModeEnum.SOFT_DELETE.value,
)


def test_hard_delete_record(sagemaker_session_mock):
feature_group = FeatureGroup(name="MyFeatureGroup", sagemaker_session=sagemaker_session_mock)
record_identifier_value_as_string = "1.0"
event_time = "2022-09-14"
feature_group.delete_record(
record_identifier_value_as_string=record_identifier_value_as_string,
event_time=event_time,
deletion_mode=DeletionModeEnum.HARD_DELETE,
)
sagemaker_session_mock.delete_record.assert_called_with(
feature_group_name="MyFeatureGroup",
record_identifier_value_as_string=record_identifier_value_as_string,
event_time=event_time,
deletion_mode=DeletionModeEnum.HARD_DELETE.value,
)


Expand Down