Skip to content

Commit 3ebb29f

Browse files
author
Rui Wang Napieralski
committed
fix: add service inserted fields to generated Hive DDL
The FeatureStore service add some fields to the ddl when it creates glue table for the FeatureGroup. Adding those fields here to be consistent.
1 parent ed82027 commit 3ebb29f

File tree

3 files changed

+13
-2
lines changed

3 files changed

+13
-2
lines changed

src/sagemaker/feature_store/feature_group.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,9 @@ def as_hive_ddl(self, database: str = "sagemaker_featurestore", table_name: str
508508
f" {definition.feature_name} "
509509
f"{self._FEATURE_TYPE_TO_DDL_DATA_TYPE_MAP.get(definition.feature_type.value)}\n"
510510
)
511+
ddl += " write_time TIMESTAMP\n"
512+
ddl += " event_time TIMESTAMP\n"
513+
ddl += " is_deleted BOOLEAN\n"
511514
ddl += ")\n"
512515
ddl += (
513516
"ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'\n"

tests/integ/test_feature_store.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,11 @@ def create_table_ddl():
131131
"CREATE EXTERNAL TABLE IF NOT EXISTS sagemaker_featurestore.{feature_group_name} (\n"
132132
" feature1 FLOAT\n"
133133
" feature2 INT\n"
134-
" feature3 STRING\n)\n"
134+
" feature3 STRING\n"
135+
" write_time TIMESTAMP\n"
136+
" event_time TIMESTAMP\n"
137+
" is_deleted BOOLEAN\n"
138+
")\n"
135139
"ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'\n"
136140
" STORED AS\n"
137141
" INPUTFORMAT 'parquet.hive.DeprecatedParquetInputFormat'\n"

tests/unit/sagemaker/feature_store/test_feature_store.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,11 @@ def create_table_ddl():
5757
"CREATE EXTERNAL TABLE IF NOT EXISTS {database}.{table_name} (\n"
5858
" feature1 FLOAT\n"
5959
" feature2 INT\n"
60-
" feature3 STRING\n)\n"
60+
" feature3 STRING\n"
61+
" write_time TIMESTAMP\n"
62+
" event_time TIMESTAMP\n"
63+
" is_deleted BOOLEAN\n"
64+
")\n"
6165
"ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'\n"
6266
" STORED AS\n"
6367
" INPUTFORMAT 'parquet.hive.DeprecatedParquetInputFormat'\n"

0 commit comments

Comments
 (0)