-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
Add new optional "separator" argument to json_normalize #14891
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
457019b
c345d6d
def361d
fac9ac1
5f777f4
992dfbc
2083f0d
d1b1720
e7df751
2566223
6f4e36a
dd8cba2
73bc6cf
f5c8d54
e80a2b9
906b51a
bdbebc4
e503d40
f3c5a42
37b22c7
a718962
f1cfe5b
8b98104
8c798c0
dc4b070
39efbbc
0ac3d98
f11501a
8e630b6
3ccb501
5faf32a
b35c689
0c52813
3ab0e55
02906ce
50930a9
24fb26d
708792a
3ab369c
1678f14
4c3d4d4
0a7cd97
07c83ee
73e2829
f79bc7a
a06e32a
dcc4632
2363314
8e0faa8
521720d
74c4285
8b72b12
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,8 +24,8 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', | |
default_handler=None, lines=False): | ||
|
||
if lines and orient != 'records': | ||
raise ValueError( | ||
"'lines' keyword only valid when 'orient' is records") | ||
raise ValueError( | ||
"'lines' keyword only valid when 'orient' is records") | ||
|
||
if isinstance(obj, Series): | ||
s = SeriesWriter( | ||
|
@@ -726,8 +726,8 @@ def nested_to_record(ds, prefix="", level=0): | |
def json_normalize(data, record_path=None, meta=None, | ||
meta_prefix=None, | ||
record_prefix=None, | ||
separator='.', | ||
errors='raise'): | ||
|
||
""" | ||
"Normalize" semi-structured JSON data into a flat table | ||
|
||
|
@@ -744,6 +744,9 @@ def json_normalize(data, record_path=None, meta=None, | |
If True, prefix records with dotted (?) path, e.g. foo.bar.field if | ||
path to records is ['foo', 'bar'] | ||
meta_prefix : string, default None | ||
separator : string, default '.' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a Also, might be better to make There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. call it sep add a version added tag |
||
Nested records will generate names separated by separator, | ||
e.g., for separator='.', { 'foo' : { 'bar' : 0 } } -> foo.bar | ||
errors : {'raise', 'ignore'}, default 'raise' | ||
* ignore : will ignore KeyError if keys listed in meta are not | ||
always present | ||
|
@@ -828,7 +831,7 @@ def _pull_field(js, spec): | |
lengths = [] | ||
|
||
meta_vals = defaultdict(list) | ||
meta_keys = ['.'.join(val) for val in meta] | ||
meta_keys = [separator.join(val) for val in meta] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. validate whether |
||
|
||
def _recursive_extract(data, path, seen_meta, level=0): | ||
if len(path) > 1: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -133,6 +133,36 @@ def test_shallow_nested(self): | |
expected = DataFrame(ex_data, columns=result.columns) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
def test_shallow_nested_with_separator(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this test can be simplified a lot. Could you do something like result = json_normalize({"A": {"A": 1, "B": 2}}, separator='_')
expected = pd.DataFrame([[1, 2]], columns={"A_A", "A_B"})
assert_frame_equal(result, expected) That way you're directly testing your change. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also add test with the default separator, and ensure that the columns are |
||
data = [{'state': 'Florida', | ||
'shortname': 'FL', | ||
'info': { | ||
'governor': 'Rick Scott' | ||
}, | ||
'counties': [{'name': 'Dade', 'population': 12345}, | ||
{'name': 'Broward', 'population': 40000}, | ||
{'name': 'Palm Beach', 'population': 60000}]}, | ||
{'state': 'Ohio', | ||
'shortname': 'OH', | ||
'info': { | ||
'governor': 'John Kasich' | ||
}, | ||
'counties': [{'name': 'Summit', 'population': 1234}, | ||
{'name': 'Cuyahoga', 'population': 1337}]}] | ||
|
||
result = json_normalize(data, 'counties', | ||
['state', 'shortname', | ||
['info', 'governor']], | ||
separator='_') | ||
ex_data = {'name': ['Dade', 'Broward', 'Palm Beach', 'Summit', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can u also add unicode tests? |
||
'Cuyahoga'], | ||
'state': ['Florida'] * 3 + ['Ohio'] * 2, | ||
'shortname': ['FL', 'FL', 'FL', 'OH', 'OH'], | ||
'info_governor': ['Rick Scott'] * 3 + ['John Kasich'] * 2, | ||
'population': [12345, 40000, 60000, 1234, 1337]} | ||
expected = DataFrame(ex_data, columns=result.columns) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
def test_meta_name_conflict(self): | ||
data = [{'foo': 'hello', | ||
'bar': 'there', | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about, ...gained
separator
option which acceptsstr
, default is"."