Skip to content

Commit 6720a19

Browse files
icywang86ruiRui Wang Napieralskiajaykarpur
authored
fix: use iterrows to iterate pandas dataframe (#2011)
itertuples doesn't return named tuples with large number of column(>254) with python versions < 3.7. See: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.itertuples.html row._asdict() would fail with regular tuples. Switching to iterrows here to avoid this limitation. Co-authored-by: Rui Wang Napieralski <[email protected]> Co-authored-by: Ajay Karpur <[email protected]>
1 parent b58495e commit 6720a19

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

src/sagemaker/feature_store/feature_group.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,10 +179,10 @@ def _ingest_single_batch(
179179
end_index (int): ending position to ingest in this batch.
180180
"""
181181
logger.info("Started ingesting index %d to %d", start_index, end_index)
182-
for row in data_frame[start_index:end_index].itertuples(index=False):
182+
for _, row in data_frame[start_index:end_index].iterrows():
183183
record = [
184184
FeatureValue(feature_name=name, value_as_string=str(value))
185-
for name, value in row._asdict().items()
185+
for name, value in row.items()
186186
]
187187
sagemaker_session.put_record(
188188
feature_group_name=feature_group_name, record=[value.to_dict() for value in record]

tests/unit/sagemaker/feature_store/test_feature_store.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def test_load_feature_definition_unsupported_types(sagemaker_session_mock):
146146
@patch("sagemaker.feature_store.feature_group.IngestionManagerPandas")
147147
def test_ingest(ingestion_manager_init, sagemaker_session_mock):
148148
feature_group = FeatureGroup(name="MyGroup", sagemaker_session=sagemaker_session_mock)
149-
df = pd.DataFrame({"float": pd.Series([2.0], dtype="float64")})
149+
df = pd.DataFrame(dict((f"float{i}", pd.Series([2.0], dtype="float64")) for i in range(300)))
150150

151151
mock_ingestion_manager_instance = Mock()
152152
ingestion_manager_init.return_value = mock_ingestion_manager_instance

0 commit comments

Comments
 (0)