@@ -43,6 +43,36 @@ class TableType(Enum):
43
43
DATA_FRAME = "DataFrame"
44
44
45
45
46
+ @attr .s
47
+ class JoinTypeEnum (Enum ):
48
+ """Enum of Join types.
49
+
50
+ The Join type can be "INNER_JOIN", "LEFT_JOIN", "RIGHT_JOIN", "FULL_JOIN", or "CROSS_JOIN".
51
+ """
52
+
53
+ INNER_JOIN = "JOIN"
54
+ LEFT_JOIN = "LEFT JOIN"
55
+ RIGHT_JOIN = "RIGHT JOIN"
56
+ FULL_JOIN = "FULL JOIN"
57
+ CROSS_JOIN = "CROSS JOIN"
58
+
59
+
60
+ @attr .s
61
+ class JoinComparatorEnum (Enum ):
62
+ """Enum of Join comparators.
63
+
64
+ The Join comparator can be "EQUALS", "GREATER_THAN", "LESS_THAN",
65
+ "GREATER_THAN_OR_EQUAL_TO", "LESS_THAN_OR_EQUAL_TO" or "NOT_EQUAL_TO"
66
+ """
67
+
68
+ EQUALS = "="
69
+ GREATER_THAN = ">"
70
+ GREATER_THAN_OR_EQUAL_TO = ">="
71
+ LESS_THAN = "<"
72
+ LESS_THAN_OR_EQUAL_TO = "<="
73
+ NOT_EQUAL_TO = "<>"
74
+
75
+
46
76
@attr .s
47
77
class FeatureGroupToBeMerged :
48
78
"""FeatureGroup metadata which will be used for SQL join.
@@ -55,19 +85,28 @@ class FeatureGroupToBeMerged:
55
85
Attributes:
56
86
features (List[str]): A list of strings representing feature names of this FeatureGroup.
57
87
included_feature_names (List[str]): A list of strings representing features to be
58
- included in the sql join.
88
+ included in the SQL join.
59
89
projected_feature_names (List[str]): A list of strings representing features to be
60
90
included for final projection in output.
61
91
catalog (str): A string representing the catalog.
62
92
database (str): A string representing the database.
63
93
table_name (str): A string representing the Athena table name of this FeatureGroup.
64
- record_dentifier_feature_name (str): A string representing the record identifier feature.
94
+ record_identifier_feature_name (str): A string representing the record identifier feature.
65
95
event_time_identifier_feature (FeatureDefinition): A FeatureDefinition representing the
66
96
event time identifier feature.
67
97
target_feature_name_in_base (str): A string representing the feature name in base which will
68
98
be used as target join key (default: None).
69
99
table_type (TableType): A TableType representing the type of table if it is Feature Group or
70
100
Panda Data Frame (default: None).
101
+ feature_name_in_target (str): A string representing the feature name in the target feature
102
+ group that will be compared to the target feature in the base feature group.
103
+ If None is provided, the record identifier feature will be used in the
104
+ SQL join. (default: None).
105
+ join_comparator (JoinComparatorEnum): A JoinComparatorEnum representing the comparator
106
+ used when joining the target feature in the base feature group and the feature
107
+ in the target feature group. (default: JoinComparatorEnum.EQUALS).
108
+ join_type (JoinTypeEnum): A JoinTypeEnum representing the type of join between
109
+ the base and target feature groups. (default: JoinTypeEnum.INNER_JOIN).
71
110
"""
72
111
73
112
features : List [str ] = attr .ib ()
@@ -80,12 +119,18 @@ class FeatureGroupToBeMerged:
80
119
event_time_identifier_feature : FeatureDefinition = attr .ib ()
81
120
target_feature_name_in_base : str = attr .ib (default = None )
82
121
table_type : TableType = attr .ib (default = None )
122
+ feature_name_in_target : str = attr .ib (default = None )
123
+ join_comparator : JoinComparatorEnum = attr .ib (default = JoinComparatorEnum .EQUALS )
124
+ join_type : JoinTypeEnum = attr .ib (default = JoinTypeEnum .INNER_JOIN )
83
125
84
126
85
127
def construct_feature_group_to_be_merged (
86
- feature_group : FeatureGroup ,
128
+ target_feature_group : FeatureGroup ,
87
129
included_feature_names : List [str ],
88
130
target_feature_name_in_base : str = None ,
131
+ feature_name_in_target : str = None ,
132
+ join_comparator : JoinComparatorEnum = JoinComparatorEnum .EQUALS ,
133
+ join_type : JoinTypeEnum = JoinTypeEnum .INNER_JOIN ,
89
134
) -> FeatureGroupToBeMerged :
90
135
"""Construct a FeatureGroupToBeMerged object by provided parameters.
91
136
@@ -95,18 +140,29 @@ def construct_feature_group_to_be_merged(
95
140
included in the output.
96
141
target_feature_name_in_base (str): A string representing the feature name in base which
97
142
will be used as target join key (default: None).
143
+ feature_name_in_target (str): A string representing the feature name in the target feature
144
+ group that will be compared to the target feature in the base feature group.
145
+ If None is provided, the record identifier feature will be used in the
146
+ SQL join. (default: None).
147
+ join_comparator (JoinComparatorEnum): A JoinComparatorEnum representing the comparator
148
+ used when joining the target feature in the base feature group and the feature
149
+ in the target feature group. (default: JoinComparatorEnum.EQUALS).
150
+ join_type (JoinTypeEnum): A JoinTypeEnum representing the type of join between
151
+ the base and target feature groups. (default: JoinTypeEnum.INNER_JOIN).
98
152
Returns:
99
153
A FeatureGroupToBeMerged object.
100
154
101
155
Raises:
102
156
ValueError: Invalid feature name(s) in included_feature_names.
103
157
"""
104
- feature_group_metadata = feature_group .describe ()
158
+ feature_group_metadata = target_feature_group .describe ()
105
159
data_catalog_config = feature_group_metadata .get ("OfflineStoreConfig" , {}).get (
106
160
"DataCatalogConfig" , None
107
161
)
108
162
if not data_catalog_config :
109
- raise RuntimeError (f"No metastore is configured with FeatureGroup { feature_group .name } ." )
163
+ raise RuntimeError (
164
+ f"No metastore is configured with FeatureGroup { target_feature_group .name } ."
165
+ )
110
166
111
167
record_identifier_feature_name = feature_group_metadata .get ("RecordIdentifierFeatureName" , None )
112
168
feature_definitions = feature_group_metadata .get ("FeatureDefinitions" , [])
@@ -126,10 +182,15 @@ def construct_feature_group_to_be_merged(
126
182
catalog = data_catalog_config .get ("Catalog" , None ) if disable_glue else _DEFAULT_CATALOG
127
183
features = [feature .get ("FeatureName" , None ) for feature in feature_definitions ]
128
184
185
+ if feature_name_in_target is not None and feature_name_in_target not in features :
186
+ raise ValueError (
187
+ f"Feature { feature_name_in_target } not found in FeatureGroup { target_feature_group .name } "
188
+ )
189
+
129
190
for included_feature in included_feature_names or []:
130
191
if included_feature not in features :
131
192
raise ValueError (
132
- f"Feature { included_feature } not found in FeatureGroup { feature_group .name } "
193
+ f"Feature { included_feature } not found in FeatureGroup { target_feature_group .name } "
133
194
)
134
195
if not included_feature_names :
135
196
included_feature_names = features
@@ -151,6 +212,9 @@ def construct_feature_group_to_be_merged(
151
212
FeatureDefinition (event_time_identifier_feature_name , event_time_identifier_feature_type ),
152
213
target_feature_name_in_base ,
153
214
TableType .FEATURE_GROUP ,
215
+ feature_name_in_target ,
216
+ join_comparator ,
217
+ join_type ,
154
218
)
155
219
156
220
@@ -236,21 +300,38 @@ def with_feature_group(
236
300
feature_group : FeatureGroup ,
237
301
target_feature_name_in_base : str = None ,
238
302
included_feature_names : List [str ] = None ,
303
+ feature_name_in_target : str = None ,
304
+ join_comparator : JoinComparatorEnum = JoinComparatorEnum .EQUALS ,
305
+ join_type : JoinTypeEnum = JoinTypeEnum .INNER_JOIN ,
239
306
):
240
307
"""Join FeatureGroup with base.
241
308
242
309
Args:
243
- feature_group (FeatureGroup): A FeatureGroup which will be joined to base.
310
+ feature_group (FeatureGroup): A target FeatureGroup which will be joined to base.
244
311
target_feature_name_in_base (str): A string representing the feature name in base which
245
- will be used as target join key (default: None).
312
+ will be used as a join key (default: None).
246
313
included_feature_names (List[str]): A list of strings representing features to be
247
314
included in the output (default: None).
248
- Returns:
249
- This DatasetBuilder object.
315
+ feature_name_in_target (str): A string representing the feature name in the target
316
+ feature group that will be compared to the target feature in the base feature group.
317
+ If None is provided, the record identifier feature will be used in the
318
+ SQL join. (default: None).
319
+ join_comparator (JoinComparatorEnum): A JoinComparatorEnum representing the comparator
320
+ used when joining the target feature in the base feature group and the feature
321
+ in the target feature group. (default: JoinComparatorEnum.EQUALS).
322
+ join_type (JoinTypeEnum): A JoinTypeEnum representing the type of join between
323
+ the base and target feature groups. (default: JoinTypeEnum.INNER_JOIN).
324
+ Returns:
325
+ This DatasetBuilder object.
250
326
"""
251
327
self ._feature_groups_to_be_merged .append (
252
328
construct_feature_group_to_be_merged (
253
- feature_group , included_feature_names , target_feature_name_in_base
329
+ feature_group ,
330
+ included_feature_names ,
331
+ target_feature_name_in_base ,
332
+ feature_name_in_target ,
333
+ join_comparator ,
334
+ join_type ,
254
335
)
255
336
)
256
337
return self
@@ -914,10 +995,18 @@ def _construct_join_condition(self, feature_group: FeatureGroupToBeMerged, suffi
914
995
Returns:
915
996
The JOIN query string.
916
997
"""
998
+
999
+ feature_name_in_target = (
1000
+ feature_group .feature_name_in_target
1001
+ if feature_group .feature_name_in_target is not None
1002
+ else feature_group .record_identifier_feature_name
1003
+ )
1004
+
917
1005
join_condition_string = (
918
- f"\n JOIN fg_{ suffix } \n "
919
- + f'ON fg_base."{ feature_group .target_feature_name_in_base } " = '
920
- + f'fg_{ suffix } ."{ feature_group .record_identifier_feature_name } "'
1006
+ f"\n { feature_group .join_type .value } fg_{ suffix } \n "
1007
+ + f'ON fg_base."{ feature_group .target_feature_name_in_base } "'
1008
+ + f" { feature_group .join_comparator .value } "
1009
+ + f'fg_{ suffix } ."{ feature_name_in_target } "'
921
1010
)
922
1011
base_timestamp_cast_function_name = "from_unixtime"
923
1012
if self ._event_time_identifier_feature_type == FeatureTypeEnum .STRING :
0 commit comments