Skip to content

Commit 04785e0

Browse files
committed
BUG: fix nested meta path bug in json_normalize (GH 27220)
1 parent f9d4319 commit 04785e0

File tree

3 files changed

+19
-11
lines changed

3 files changed

+19
-11
lines changed

doc/source/whatsnew/v0.25.1.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ MultiIndex
107107
I/O
108108
^^^
109109

110-
-
110+
- Fix bug in :meth:`io.json.json_normalize` when nested meta paths with a nested record_path (:issue:`27220`)
111111
-
112112
-
113113

pandas/io/json/_normalize.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,8 @@ def _recursive_extract(data, path, seen_meta, level=0):
288288
if len(path) > 1:
289289
for obj in data:
290290
for val, key in zip(meta, meta_keys):
291+
# Pull value for all the keys in case meta path and
292+
# record path are on two branches
291293
seen_meta[key] = _pull_field(obj, val[0])
292294

293295
_recursive_extract(obj[path[0]], path[1:], seen_meta, level=level + 1)
@@ -304,15 +306,24 @@ def _recursive_extract(data, path, seen_meta, level=0):
304306
# For repeating the metadata later
305307
lengths.append(len(recs))
306308
for val, key in zip(meta, meta_keys):
309+
# Extract the value of the key when the level
310+
# is at the meta path end
307311
if level + 1 > len(val):
308312
meta_val = seen_meta[key]
309313
meta_vals[key].append(meta_val)
314+
# Extract the value of the key from seen_meta when
315+
# meta path and record path are on two branches
310316
elif seen_meta:
311317
meta_val = seen_meta[key]
312-
if isinstance(meta_val, list):
313-
meta_vals[key].append(meta_val[ind][val[level]])
314-
else:
315-
meta_vals[key].append(meta_val[val[level]])
318+
meta_vals[key] += [
319+
# The list case
320+
meta_val[ind][val[level]]
321+
if isinstance(meta_val, list)
322+
# The dict case
323+
else meta_val[val[level]]
324+
]
325+
# At top level, seen_meta is empty, pull from data
326+
# directly and raise KeyError if not found
316327
else:
317328
try:
318329
meta_val = _pull_field(obj, val[level:])

pandas/tests/io/json/test_normalize.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -304,13 +304,10 @@ def test_nested_meta_path_with_nested_record_path(self, state_data):
304304
"state": ["Florida"] * 21 + ["Ohio"] * 14,
305305
"shortname": ["FL"] * 21 + ["OH"] * 14,
306306
"info.governor": ["Rick Scott"] * 21 + ["John Kasich"] * 14,
307-
"population": [12345] * 4
308-
+ [40000] * 7
309-
+ [60000] * 10
310-
+ [1234] * 6
311-
+ [1337] * 8,
312307
}
313-
expected = DataFrame(ex_data, columns=result.columns)
308+
expected = DataFrame(
309+
ex_data, columns=[0, "state", "shortname", "info.governor"]
310+
)
314311
tm.assert_frame_equal(result, expected)
315312

316313
def test_meta_name_conflict(self):

0 commit comments

Comments
 (0)