Skip to content

Commit 29710bb

Browse files
authored
Revert "feature: feature store with_feature_group functionality changes" (#3657)
Revert back change as requested
1 parent 5dba70c commit 29710bb

File tree

3 files changed

+19
-507
lines changed

3 files changed

+19
-507
lines changed

src/sagemaker/feature_store/dataset_builder.py

Lines changed: 14 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -43,34 +43,6 @@ class TableType(Enum):
4343
DATA_FRAME = "DataFrame"
4444

4545

46-
@attr.s
47-
class JoinTypeEnum(Enum):
48-
"""Enum of Join types.
49-
50-
The Join comparator can be "INNER_JOIN", "LEFT_JOIN", "RIGHT_JOIN", "FULL_JOIN"
51-
"""
52-
53-
INNER_JOIN = "JOIN"
54-
LEFT_JOIN = "LEFT JOIN"
55-
RIGHT_JOIN = "RIGHT JOIN"
56-
FULL_JOIN = "FULL JOIN"
57-
58-
59-
@attr.s
60-
class JoinComparatorEnum(Enum):
61-
"""Enum of Join comparators.
62-
63-
The Join comparator can be "EQUALS", "GREATER_THAN", "LESS_THAN",
64-
"GREATER_THAN_OR_EQUAL_TO", or "LESS_THAN_OR_EQUAL_TO"
65-
"""
66-
67-
EQUALS = "="
68-
GREATER_THAN = ">"
69-
GREATER_THAN_OR_EQUAL_TO = ">="
70-
LESS_THAN = "<"
71-
LESS_THAN_OR_EQUAL_TO = "<="
72-
73-
7446
@attr.s
7547
class FeatureGroupToBeMerged:
7648
"""FeatureGroup metadata which will be used for SQL join.
@@ -83,28 +55,19 @@ class FeatureGroupToBeMerged:
8355
Attributes:
8456
features (List[str]): A list of strings representing feature names of this FeatureGroup.
8557
included_feature_names (List[str]): A list of strings representing features to be
86-
included in the SQL join.
58+
included in the sql join.
8759
projected_feature_names (List[str]): A list of strings representing features to be
8860
included for final projection in output.
8961
catalog (str): A string representing the catalog.
9062
database (str): A string representing the database.
9163
table_name (str): A string representing the Athena table name of this FeatureGroup.
92-
record_identifier_feature_name (str): A string representing the record identifier feature.
64+
record_dentifier_feature_name (str): A string representing the record identifier feature.
9365
event_time_identifier_feature (FeatureDefinition): A FeatureDefinition representing the
9466
event time identifier feature.
9567
target_feature_name_in_base (str): A string representing the feature name in base which will
9668
be used as target join key (default: None).
9769
table_type (TableType): A TableType representing the type of table if it is Feature Group or
9870
Panda Data Frame (default: None).
99-
feature_name_in_target (str): A string representing the feature name in the target feature
100-
group that will be compared to the target feature in the base feature group.
101-
If None is provided, the record identifier feature will be used in the
102-
SQL join. (default: None).
103-
join_comparator (JoinComparatorEnum): A JoinComparatorEnum representing the comparator
104-
used when joining the target feature in the base feature group and the feature
105-
in the target feature group. (default: JoinComparatorEnum.EQUALS).
106-
join_type (JoinTypeEnum): A JoinTypeEnum representing the type of join between
107-
the base and target feature groups. (default: JoinTypeEnum.INNER_JOIN).
10871
"""
10972

11073
features: List[str] = attr.ib()
@@ -117,18 +80,12 @@ class FeatureGroupToBeMerged:
11780
event_time_identifier_feature: FeatureDefinition = attr.ib()
11881
target_feature_name_in_base: str = attr.ib(default=None)
11982
table_type: TableType = attr.ib(default=None)
120-
feature_name_in_target: str = attr.ib(default=None)
121-
join_comparator: JoinComparatorEnum = attr.ib(default=JoinComparatorEnum.EQUALS)
122-
join_type: JoinTypeEnum = attr.ib(default=JoinTypeEnum.INNER_JOIN)
12383

12484

12585
def construct_feature_group_to_be_merged(
126-
target_feature_group: FeatureGroup,
86+
feature_group: FeatureGroup,
12787
included_feature_names: List[str],
12888
target_feature_name_in_base: str = None,
129-
feature_name_in_target: str = None,
130-
join_comparator: JoinComparatorEnum = JoinComparatorEnum.EQUALS,
131-
join_type: JoinTypeEnum = JoinTypeEnum.INNER_JOIN,
13289
) -> FeatureGroupToBeMerged:
13390
"""Construct a FeatureGroupToBeMerged object by provided parameters.
13491
@@ -138,29 +95,18 @@ def construct_feature_group_to_be_merged(
13895
included in the output.
13996
target_feature_name_in_base (str): A string representing the feature name in base which
14097
will be used as target join key (default: None).
141-
feature_name_in_target (str): A string representing the feature name in the target feature
142-
group that will be compared to the target feature in the base feature group.
143-
If None is provided, the record identifier feature will be used in the
144-
SQL join. (default: None).
145-
join_comparator (JoinComparatorEnum): A JoinComparatorEnum representing the comparator
146-
used when joining the target feature in the base feature group and the feature
147-
in the target feature group. (default: JoinComparatorEnum.EQUALS).
148-
join_type (JoinTypeEnum): A JoinTypeEnum representing the type of join between
149-
the base and target feature groups. (default: JoinTypeEnum.INNER_JOIN).
15098
Returns:
15199
A FeatureGroupToBeMerged object.
152100
153101
Raises:
154102
ValueError: Invalid feature name(s) in included_feature_names.
155103
"""
156-
feature_group_metadata = target_feature_group.describe()
104+
feature_group_metadata = feature_group.describe()
157105
data_catalog_config = feature_group_metadata.get("OfflineStoreConfig", {}).get(
158106
"DataCatalogConfig", None
159107
)
160108
if not data_catalog_config:
161-
raise RuntimeError(
162-
f"No metastore is configured with FeatureGroup {target_feature_group.name}."
163-
)
109+
raise RuntimeError(f"No metastore is configured with FeatureGroup {feature_group.name}.")
164110

165111
record_identifier_feature_name = feature_group_metadata.get("RecordIdentifierFeatureName", None)
166112
feature_definitions = feature_group_metadata.get("FeatureDefinitions", [])
@@ -180,15 +126,10 @@ def construct_feature_group_to_be_merged(
180126
catalog = data_catalog_config.get("Catalog", None) if disable_glue else _DEFAULT_CATALOG
181127
features = [feature.get("FeatureName", None) for feature in feature_definitions]
182128

183-
if feature_name_in_target is not None and feature_name_in_target not in features:
184-
raise ValueError(
185-
f"Feature {feature_name_in_target} not found in FeatureGroup {target_feature_group.name}"
186-
)
187-
188129
for included_feature in included_feature_names or []:
189130
if included_feature not in features:
190131
raise ValueError(
191-
f"Feature {included_feature} not found in FeatureGroup {target_feature_group.name}"
132+
f"Feature {included_feature} not found in FeatureGroup {feature_group.name}"
192133
)
193134
if not included_feature_names:
194135
included_feature_names = features
@@ -210,9 +151,6 @@ def construct_feature_group_to_be_merged(
210151
FeatureDefinition(event_time_identifier_feature_name, event_time_identifier_feature_type),
211152
target_feature_name_in_base,
212153
TableType.FEATURE_GROUP,
213-
feature_name_in_target,
214-
join_comparator,
215-
join_type,
216154
)
217155

218156

@@ -289,38 +227,21 @@ def with_feature_group(
289227
feature_group: FeatureGroup,
290228
target_feature_name_in_base: str = None,
291229
included_feature_names: List[str] = None,
292-
feature_name_in_target: str = None,
293-
join_comparator: JoinComparatorEnum = JoinComparatorEnum.EQUALS,
294-
join_type: JoinTypeEnum = JoinTypeEnum.INNER_JOIN,
295230
):
296231
"""Join FeatureGroup with base.
297232
298233
Args:
299-
feature_group (FeatureGroup): A target FeatureGroup which will be joined to base.
234+
feature_group (FeatureGroup): A FeatureGroup which will be joined to base.
300235
target_feature_name_in_base (str): A string representing the feature name in base which
301-
will be used as a join key (default: None).
236+
will be used as target join key (default: None).
302237
included_feature_names (List[str]): A list of strings representing features to be
303238
included in the output (default: None).
304-
feature_name_in_target (str): A string representing the feature name in the target
305-
feature group that will be compared to the target feature in the base feature group.
306-
If None is provided, the record identifier feature will be used in the
307-
SQL join. (default: None).
308-
join_comparator (JoinComparatorEnum): A JoinComparatorEnum representing the comparator
309-
used when joining the target feature in the base feature group and the feature
310-
in the target feature group. (default: JoinComparatorEnum.EQUALS).
311-
join_type (JoinTypeEnum): A JoinTypeEnum representing the type of join between
312-
the base and target feature groups. (default: JoinTypeEnum.INNER_JOIN).
313-
Returns:
314-
This DatasetBuilder object.
239+
Returns:
240+
This DatasetBuilder object.
315241
"""
316242
self._feature_groups_to_be_merged.append(
317243
construct_feature_group_to_be_merged(
318-
feature_group,
319-
included_feature_names,
320-
target_feature_name_in_base,
321-
feature_name_in_target,
322-
join_comparator,
323-
join_type,
244+
feature_group, included_feature_names, target_feature_name_in_base
324245
)
325246
)
326247
return self
@@ -984,18 +905,10 @@ def _construct_join_condition(self, feature_group: FeatureGroupToBeMerged, suffi
984905
Returns:
985906
The JOIN query string.
986907
"""
987-
988-
feature_name_in_target = (
989-
feature_group.feature_name_in_target
990-
if feature_group.feature_name_in_target is not None
991-
else feature_group.record_identifier_feature_name
992-
)
993-
994908
join_condition_string = (
995-
f"\n{feature_group.join_type.value} fg_{suffix}\n"
996-
+ f'ON fg_base."{feature_group.target_feature_name_in_base}"'
997-
+ f" {feature_group.join_comparator.value} "
998-
+ f'fg_{suffix}."{feature_name_in_target}"'
909+
f"\nJOIN fg_{suffix}\n"
910+
+ f'ON fg_base."{feature_group.target_feature_name_in_base}" = '
911+
+ f'fg_{suffix}."{feature_group.record_identifier_feature_name}"'
999912
)
1000913
base_timestamp_cast_function_name = "from_unixtime"
1001914
if self._event_time_identifier_feature_type == FeatureTypeEnum.STRING:

0 commit comments

Comments
 (0)