-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
BUG: fix nested meta path bug (GH 27220) #27667
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
383f0e6
2fed9a8
f9d4319
04785e0
2a695e9
a1efe7e
9c568b6
1c186aa
9efda32
62ee093
4bdaf01
14996f6
cf53749
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -288,12 +288,14 @@ def _recursive_extract(data, path, seen_meta, level=0): | |
if len(path) > 1: | ||
for obj in data: | ||
for val, key in zip(meta, meta_keys): | ||
if level + 1 == len(val): | ||
seen_meta[key] = _pull_field(obj, val[-1]) | ||
|
||
# Pull value for all the keys in case meta path and | ||
# record path are on two branches | ||
seen_meta[key] = _pull_field(obj, val[0]) | ||
|
||
_recursive_extract(obj[path[0]], path[1:], seen_meta, level=level + 1) | ||
else: | ||
for obj in data: | ||
for ind, obj in enumerate(data): | ||
recs = _pull_field(obj, path[0]) | ||
recs = [ | ||
nested_to_record(r, sep=sep, max_level=max_level) | ||
|
@@ -305,8 +307,26 @@ def _recursive_extract(data, path, seen_meta, level=0): | |
# For repeating the metadata later | ||
lengths.append(len(recs)) | ||
for val, key in zip(meta, meta_keys): | ||
|
||
# Extract the value of the key when the level | ||
# is at the meta path end | ||
if level + 1 > len(val): | ||
meta_val = seen_meta[key] | ||
|
||
# Extract the value of the key from seen_meta when | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you put a blank line fore these comments (and below and above), basically easier to read if they are paragraph like. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Definitely. Will update this tomorrow. |
||
# meta path and record path are on two branches | ||
elif seen_meta: | ||
meta_val_obj = seen_meta[key] | ||
|
||
# Both the list case and the dict case are covered | ||
meta_val = ( | ||
meta_val_obj[ind][val[level]] | ||
if isinstance(meta_val_obj, list) | ||
else meta_val_obj[val[level]] | ||
) | ||
|
||
# At top level, seen_meta is empty, pull from data | ||
# directly and raise KeyError if not found | ||
else: | ||
try: | ||
meta_val = _pull_field(obj, val[level:]) | ||
|
@@ -320,6 +340,7 @@ def _recursive_extract(data, path, seen_meta, level=0): | |
"{err} is not always present".format(err=e) | ||
) | ||
meta_vals[key].append(meta_val) | ||
|
||
records.extend(recs) | ||
|
||
_recursive_extract(data, record_path, {}, level=0) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -287,6 +287,31 @@ def test_shallow_nested(self): | |
expected = DataFrame(ex_data, columns=result.columns) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
@pytest.mark.skipif(not PY36, reason="drop support for 3.5 soon") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this is needed if you follow comment below |
||
def test_nested_meta_path_with_nested_record_path(self, state_data): | ||
# GH 27220 | ||
result = json_normalize( | ||
data=state_data, | ||
record_path=["counties", "name"], | ||
meta=["state", "shortname", ["info", "governor"]], | ||
errors="ignore", | ||
) | ||
|
||
ex_data = [ | ||
[ | ||
i | ||
for word in ["Dade", "Broward", "Palm Beach", "Summit", "Cuyahoga"] | ||
for i in word | ||
], | ||
["Florida"] * 21 + ["Ohio"] * 14, | ||
["FL"] * 21 + ["OH"] * 14, | ||
["Rick Scott"] * 21 + ["John Kasich"] * 14, | ||
] | ||
expected = DataFrame(ex_data).T | ||
expected.columns = [0, "state", "shortname", "info.governor"] | ||
|
||
tm.assert_frame_equal(result, expected) | ||
|
||
def test_meta_name_conflict(self): | ||
data = [ | ||
{ | ||
|
Uh oh!
There was an error while loading. Please reload this page.