Skip to content

Commit a323330

Browse files
committed
fix binary check.
1 parent 0e084d8 commit a323330

File tree

1 file changed

+19
-2
lines changed

1 file changed

+19
-2
lines changed

src/sagemaker/local/data.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,25 @@ def split(self, file):
233233
Returns: generator for the individual records that were split from
234234
the file
235235
"""
236-
with open(file, "r+b") as f:
237-
yield f.read()
236+
with open(file, "rb") as f:
237+
buf = f.read()
238+
if not self._is_binary(buf):
239+
buf = buf.decode()
240+
yield buf
241+
242+
def _is_binary(self, buf):
243+
"""binary check.
244+
245+
binary or text check.
246+
247+
Args:
248+
buf (bytes): bytes in target file.
249+
250+
Returns:
251+
is binary(True) or text(False).
252+
"""
253+
textchars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7F})
254+
return bool(buf.translate(None, textchars))
238255

239256

240257
class LineSplitter(Splitter):

0 commit comments

Comments
 (0)