Skip to content

Commit c4dff7c

Browse files
author
Balaji Veeramani
committed
Add JSON Lines serializer
1 parent be1deba commit c4dff7c

File tree

2 files changed

+80
-0
lines changed

2 files changed

+80
-0
lines changed

src/sagemaker/serializers.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,34 @@ def serialize(self, data):
192192
return json.dumps(data)
193193

194194

195+
class JSONLinesSerializer(BaseSerializer):
196+
"""Serialize data to a JSON Lines formatted string."""
197+
198+
CONTENT_TYPE = "application/jsonlines"
199+
200+
def serialize(self, data):
201+
"""Serialize data of various formats to a JSON Lines formatted string.
202+
203+
Args:
204+
data (object): Data to be serialized. The data can be a string,
205+
list of JSON serializable objects, or a file-like object.
206+
207+
Returns:
208+
str: The data serialized as a string containing newline-separated
209+
JSON values.
210+
"""
211+
if isinstance(data, str):
212+
return data
213+
214+
if isinstance(data, list):
215+
return "\n".join(json.dumps(element) for element in data)
216+
217+
if hasattr(data, "read"):
218+
return data.read()
219+
220+
raise ValueError("Object of type %s is not JSON Lines serializable." % type(data))
221+
222+
195223
class SparseMatrixSerializer(BaseSerializer):
196224
"""Serialize a sparse matrix to a buffer using the .npz format."""
197225

tests/unit/sagemaker/test_serializers.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
NumpySerializer,
2626
JSONSerializer,
2727
SparseMatrixSerializer,
28+
JSONLinesSerializer,
2829
)
2930
from tests.unit import DATA_DIR
3031

@@ -235,6 +236,57 @@ def test_json_serializer_csv_buffer(json_serializer):
235236
assert result == validation_value
236237

237238

239+
@pytest.fixture
240+
def json_lines_serializer():
241+
return JSONLinesSerializer()
242+
243+
244+
@pytest.mark.parametrize(
245+
"input, expected",
246+
[
247+
('["Name", "Score"]\n["Gilbert", 24]', '["Name", "Score"]\n["Gilbert", 24]'),
248+
(
249+
'{"Name": "Gilbert", "Score": 24}\n{"Name": "Alexa", "Score": 29}',
250+
'{"Name": "Gilbert", "Score": 24}\n{"Name": "Alexa", "Score": 29}',
251+
),
252+
],
253+
)
254+
def test_json_lines_serializer_string(json_lines_serializer, input, expected):
255+
actual = json_lines_serializer.serialize(input)
256+
assert actual == expected
257+
258+
259+
@pytest.mark.parametrize(
260+
"input, expected",
261+
[
262+
([["Name", "Score"], ["Gilbert", 24]], '["Name", "Score"]\n["Gilbert", 24]'),
263+
(
264+
[{"Name": "Gilbert", "Score": 24}, {"Name": "Alexa", "Score": 29}],
265+
'{"Name": "Gilbert", "Score": 24}\n{"Name": "Alexa", "Score": 29}',
266+
),
267+
],
268+
)
269+
def test_json_lines_serializer_list(json_lines_serializer, input, expected):
270+
actual = json_lines_serializer.serialize(input)
271+
assert actual == expected
272+
273+
274+
@pytest.mark.parametrize(
275+
"source, expected",
276+
[
277+
('["Name", "Score"]\n["Gilbert", 24]', '["Name", "Score"]\n["Gilbert", 24]'),
278+
(
279+
'{"Name": "Gilbert", "Score": 24}\n{"Name": "Alexa", "Score": 29}',
280+
'{"Name": "Gilbert", "Score": 24}\n{"Name": "Alexa", "Score": 29}',
281+
),
282+
],
283+
)
284+
def test_json_lines_serializer_file_like(json_lines_serializer, source, expected):
285+
input = io.StringIO(source)
286+
actual = json_lines_serializer.serialize(input)
287+
assert actual == expected
288+
289+
238290
@pytest.fixture
239291
def sparse_matrix_serializer():
240292
return SparseMatrixSerializer()

0 commit comments

Comments
 (0)