@@ -43,6 +43,32 @@ class TableType(Enum):
43
43
DATA_FRAME = "DataFrame"
44
44
45
45
46
+ @attr .s
47
+ class JoinTypeEnum (Enum ):
48
+ """Enum of Join types.
49
+ The Join comparator can be "INNER_JOIN", "LEFT_JOIN", "RIGHT_JOIN", "FULL_JOIN"
50
+ """
51
+
52
+ INNER_JOIN = "JOIN"
53
+ LEFT_JOIN = "LEFT JOIN"
54
+ RIGHT_JOIN = "RIGHT JOIN"
55
+ FULL_JOIN = "FULL JOIN"
56
+
57
+
58
+ @attr .s
59
+ class JoinComparatorEnum (Enum ):
60
+ """Enum of Join comparators.
61
+ The Join comparator can be "EQUALS", "GREATER_THAN", "LESS_THAN",
62
+ "GREATER_THAN_OR_EQUAL_TO", or "LESS_THAN_OR_EQUAL_TO"
63
+ """
64
+
65
+ EQUALS = "="
66
+ GREATER_THAN = ">"
67
+ GREATER_THAN_OR_EQUAL_TO = ">="
68
+ LESS_THAN = "<"
69
+ LESS_THAN_OR_EQUAL_TO = "<="
70
+
71
+
46
72
@attr .s
47
73
class FeatureGroupToBeMerged :
48
74
"""FeatureGroup metadata which will be used for SQL join.
@@ -68,6 +94,13 @@ class FeatureGroupToBeMerged:
68
94
be used as target join key (default: None).
69
95
table_type (TableType): A TableType representing the type of table if it is Feature Group or
70
96
Panda Data Frame (default: None).
97
+ feature_name_in_target (str): A string representing the feature in the target feature group
98
+ that will be compared to the target feature in the base feature group
99
+ join_comparator (JoinComparatorEnum): A JoinComparatorEnum representing the comparator used
100
+ when joining the target feature in the base feature group and the feature in the target
101
+ feature group (default: None).
102
+ join_type (JoinTypeEnum): A JoinTypeEnum representing the type of join between the base and
103
+ target feature groups. (default: None).
71
104
"""
72
105
73
106
features : List [str ] = attr .ib ()
@@ -80,12 +113,18 @@ class FeatureGroupToBeMerged:
80
113
event_time_identifier_feature : FeatureDefinition = attr .ib ()
81
114
target_feature_name_in_base : str = attr .ib (default = None )
82
115
table_type : TableType = attr .ib (default = None )
116
+ feature_name_in_target : str = attr .ib (default = None )
117
+ join_comparator : JoinComparatorEnum = attr .ib (default = None )
118
+ join_type : JoinTypeEnum = attr .ib (default = None )
83
119
84
120
85
121
def construct_feature_group_to_be_merged (
86
- feature_group : FeatureGroup ,
122
+ target_feature_group : FeatureGroup ,
87
123
included_feature_names : List [str ],
88
124
target_feature_name_in_base : str = None ,
125
+ feature_name_in_target : str = None ,
126
+ join_comparator : JoinComparatorEnum = None ,
127
+ join_type : JoinTypeEnum = None
89
128
) -> FeatureGroupToBeMerged :
90
129
"""Construct a FeatureGroupToBeMerged object by provided parameters.
91
130
@@ -101,12 +140,12 @@ def construct_feature_group_to_be_merged(
101
140
Raises:
102
141
ValueError: Invalid feature name(s) in included_feature_names.
103
142
"""
104
- feature_group_metadata = feature_group .describe ()
143
+ feature_group_metadata = target_feature_group .describe ()
105
144
data_catalog_config = feature_group_metadata .get ("OfflineStoreConfig" , {}).get (
106
145
"DataCatalogConfig" , None
107
146
)
108
147
if not data_catalog_config :
109
- raise RuntimeError (f"No metastore is configured with FeatureGroup { feature_group .name } ." )
148
+ raise RuntimeError (f"No metastore is configured with FeatureGroup { target_feature_group .name } ." )
110
149
111
150
record_identifier_feature_name = feature_group_metadata .get ("RecordIdentifierFeatureName" , None )
112
151
feature_definitions = feature_group_metadata .get ("FeatureDefinitions" , [])
@@ -126,10 +165,15 @@ def construct_feature_group_to_be_merged(
126
165
catalog = data_catalog_config .get ("Catalog" , None ) if disable_glue else _DEFAULT_CATALOG
127
166
features = [feature .get ("FeatureName" , None ) for feature in feature_definitions ]
128
167
168
+ if (feature_name_in_target is not None and feature_name_in_target not in features ):
169
+ raise ValueError (
170
+ f"Feature { feature_name_in_target } not found in FeatureGroup { target_feature_group .name } "
171
+ )
172
+
129
173
for included_feature in included_feature_names or []:
130
174
if included_feature not in features :
131
175
raise ValueError (
132
- f"Feature { included_feature } not found in FeatureGroup { feature_group .name } "
176
+ f"Feature { included_feature } not found in FeatureGroup { target_feature_group .name } "
133
177
)
134
178
if not included_feature_names :
135
179
included_feature_names = features
@@ -151,6 +195,9 @@ def construct_feature_group_to_be_merged(
151
195
FeatureDefinition (event_time_identifier_feature_name , event_time_identifier_feature_type ),
152
196
target_feature_name_in_base ,
153
197
TableType .FEATURE_GROUP ,
198
+ feature_name_in_target ,
199
+ join_comparator ,
200
+ join_type
154
201
)
155
202
156
203
@@ -227,6 +274,9 @@ def with_feature_group(
227
274
feature_group : FeatureGroup ,
228
275
target_feature_name_in_base : str = None ,
229
276
included_feature_names : List [str ] = None ,
277
+ feature_name_in_target : str = None ,
278
+ join_comparator : JoinComparatorEnum = None ,
279
+ join_type : JoinTypeEnum = None
230
280
):
231
281
"""Join FeatureGroup with base.
232
282
@@ -241,7 +291,11 @@ def with_feature_group(
241
291
"""
242
292
self ._feature_groups_to_be_merged .append (
243
293
construct_feature_group_to_be_merged (
244
- feature_group , included_feature_names , target_feature_name_in_base
294
+ feature_group , included_feature_names ,
295
+ target_feature_name_in_base ,
296
+ feature_name_in_target ,
297
+ join_comparator ,
298
+ join_type
245
299
)
246
300
)
247
301
return self
@@ -905,10 +959,22 @@ def _construct_join_condition(self, feature_group: FeatureGroupToBeMerged, suffi
905
959
Returns:
906
960
The JOIN query string.
907
961
"""
962
+
963
+ join_type = (feature_group .join_type if feature_group .join_type is not None
964
+ else JoinTypeEnum .INNER_JOIN )
965
+
966
+ join_comparator = (feature_group .join_comparator
967
+ if feature_group .join_comparator is not None
968
+ else JoinComparatorEnum .EQUALS )
969
+
970
+ feature_name_in_target = (feature_group .feature_name_in_target
971
+ if feature_group .feature_name_in_target is not None
972
+ else feature_group .record_identifier_feature_name )
973
+
908
974
join_condition_string = (
909
- f"\n JOIN fg_{ suffix } \n "
910
- + f'ON fg_base."{ feature_group .target_feature_name_in_base } " = '
911
- + f'fg_{ suffix } ."{ feature_group . record_identifier_feature_name } "'
975
+ f"\n { join_type . value } fg_{ suffix } \n "
976
+ + f'ON fg_base."{ feature_group .target_feature_name_in_base } " { join_comparator . value } '
977
+ + f'fg_{ suffix } ."{ feature_name_in_target } "'
912
978
)
913
979
base_timestamp_cast_function_name = "from_unixtime"
914
980
if self ._event_time_identifier_feature_type == FeatureTypeEnum .STRING :
0 commit comments