31
31
32
32
def get_session_from_role (region : str , assume_role : str = None ) -> Session :
33
33
"""Method use to get the :class:`sagemaker.session.Session` from a role and a region.
34
- Helpful in case it's invoke from a session with a role without permission it can assume
35
- another role temporarily to perform certain tasks.
34
+
35
+ Description:
36
+ Helpful in case it's invoke from a session with a role without permission it can assume
37
+ another role temporarily to perform certain tasks.
38
+
36
39
Args:
37
40
assume_role: role name
38
41
region: region name
42
+
39
43
Returns:
40
44
"""
41
45
boto_session = boto3 .Session (region_name = region )
@@ -74,24 +78,26 @@ def get_session_from_role(region: str, assume_role: str = None) -> Session:
74
78
75
79
76
80
def get_feature_group_as_dataframe (
77
- feature_group_name : str ,
78
- athena_bucket : str ,
79
- query : str = """SELECT * FROM "sagemaker_featurestore"."#{table}"
80
- WHERE is_deleted=False """ ,
81
- role : str = None ,
82
- region : str = None ,
83
- session = None ,
84
- event_time_feature_name : str = None ,
85
- latest_ingestion : bool = True ,
86
- verbose : bool = True ,
87
- ** pandas_read_csv_kwargs ,
81
+ feature_group_name : str ,
82
+ athena_bucket : str ,
83
+ query : str = """SELECT * FROM "sagemaker_featurestore"."#{table}"
84
+ WHERE is_deleted=False """ ,
85
+ role : str = None ,
86
+ region : str = None ,
87
+ session = None ,
88
+ event_time_feature_name : str = None ,
89
+ latest_ingestion : bool = True ,
90
+ verbose : bool = True ,
91
+ ** pandas_read_csv_kwargs ,
88
92
) -> DataFrame :
89
93
"""Get a :class:`sagemaker.feature_store.feature_group.FeatureGroup` as a pandas.DataFrame
94
+
90
95
Description:
91
96
Method to run an athena query over a Feature Group in a Feature Store
92
97
to retrieve its data.It needs the sagemaker.Session linked to a role
93
98
or the role and region used to work Feature Stores.Returns a dataframe
94
99
with the data.
100
+
95
101
Args:
96
102
region (str): region of the target Feature Store
97
103
feature_group_name (str): feature store name
@@ -110,6 +116,7 @@ def get_feature_group_as_dataframe(
110
116
If False it will take whatever is specified in the query, or
111
117
if not specify it, it will get all the data that wasn't deleted.
112
118
verbose (bool): if True show messages, if False is silent.
119
+
113
120
Returns:
114
121
dataset (pandas.DataFrame): dataset with the data retrieved from feature group
115
122
"""
@@ -121,8 +128,8 @@ def get_feature_group_as_dataframe(
121
128
if latest_ingestion :
122
129
if event_time_feature_name is not None :
123
130
query += str (
124
- f"AND { event_time_feature_name } =(SELECT " +
125
- f"MAX({ event_time_feature_name } ) FROM " +
131
+ f"AND { event_time_feature_name } =(SELECT "
132
+ f"MAX({ event_time_feature_name } ) FROM "
126
133
'"sagemaker_featurestore"."#{table}")'
127
134
)
128
135
else :
@@ -169,11 +176,14 @@ def get_feature_group_as_dataframe(
169
176
170
177
def _format_column_names (data : pandas .DataFrame ) -> pandas .DataFrame :
171
178
"""Formats the column names for :class:`sagemaker.feature_store.feature_group.FeatureGroup`
179
+
172
180
Description:
173
181
Module to format correctly the name of the columns of a DataFrame
174
182
to later generate the features names of a Feature Group
183
+
175
184
Args:
176
185
data (pandas.DataFrame): dataframe used
186
+
177
187
Returns:
178
188
pandas.DataFrame
179
189
"""
@@ -183,8 +193,11 @@ def _format_column_names(data: pandas.DataFrame) -> pandas.DataFrame:
183
193
184
194
def _cast_object_to_string (data_frame : pandas .DataFrame ) -> pandas .DataFrame :
185
195
"""Cast properly pandas object types to strings
186
- Method to convert 'object' and 'O' column dtypes of a pandas.DataFrame to
187
- a valid string type recognized by Feature Groups.
196
+
197
+ Description:
198
+ Method to convert 'object' and 'O' column dtypes of a pandas.DataFrame to
199
+ a valid string type recognized by Feature Groups.
200
+
188
201
Args:
189
202
data_frame: dataframe used
190
203
Returns:
@@ -196,23 +209,25 @@ def _cast_object_to_string(data_frame: pandas.DataFrame) -> pandas.DataFrame:
196
209
197
210
198
211
def prepare_fg_from_dataframe_or_file (
199
- dataframe_or_path : Union [str , Path , pandas .DataFrame ],
200
- feature_group_name : str ,
201
- role : str = None ,
202
- region : str = None ,
203
- session = None ,
204
- record_id : str = "record_id" ,
205
- event_id : str = "data_as_of_date" ,
206
- verbose : bool = False ,
207
- ** pandas_read_csv_kwargs
212
+ dataframe_or_path : Union [str , Path , pandas .DataFrame ],
213
+ feature_group_name : str ,
214
+ role : str = None ,
215
+ region : str = None ,
216
+ session = None ,
217
+ record_id : str = "record_id" ,
218
+ event_id : str = "data_as_of_date" ,
219
+ verbose : bool = False ,
220
+ ** pandas_read_csv_kwargs ,
208
221
) -> FeatureGroup :
209
222
"""Prepares a dataframe to create a :class:`sagemaker.feature_store.feature_group.FeatureGroup`
223
+
210
224
Description:
211
225
Function to prepare a dataframe for creating a Feature Group from a pandas.DataFrame
212
226
or a path to a file with proper dtypes, feature names and mandatory features (record_id,
213
227
event_id). It needs the sagemaker.Session linked to a role or the role and region used
214
228
to work Feature Stores. If record_id or event_id are not specified it will create ones
215
229
by default with the names 'record_id' and 'data_as_of_date'.
230
+
216
231
Args:
217
232
**pandas_read_csv_kwargs (object):
218
233
feature_group_name (str): feature group name
@@ -228,6 +243,7 @@ def prepare_fg_from_dataframe_or_file(
228
243
role (str) : role used to get the session.
229
244
region (str) : region used to get the session.
230
245
session (str): session of SageMaker used to work with the feature store
246
+
231
247
Returns:
232
248
:class:`sagemaker.feature_store.feature_group.FeatureGroup`: FG prepared with all
233
249
the methods and definitions properly defined
0 commit comments