Skip to content

Commit 3df74da

Browse files
committed
apply review. fix docs.
1 parent 1caa19e commit 3df74da

File tree

1 file changed

+7
-12
lines changed

1 file changed

+7
-12
lines changed

src/sagemaker/local/data.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -224,39 +224,34 @@ class NoneSplitter(Splitter):
224224
# non-utf8 charactors.
225225
_textchars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7F})
226226

227-
def split(self, file):
227+
def split(self, filename):
228228
"""Split a file into records using a specific strategy.
229229
230230
For this NoneSplitter there is no actual split happening and the file
231231
is returned as a whole.
232232
233233
Args:
234-
file (str): path to the file to split
234+
filename (str): path to the file to split
235235
236236
Returns: generator for the individual records that were split from
237237
the file
238238
"""
239-
with open(file, "rb") as f:
239+
with open(filename, "rb") as f:
240240
buf = f.read()
241241
if not self._is_binary(buf):
242242
buf = buf.decode()
243243
yield buf
244244

245245
def _is_binary(self, buf):
246246
"""binary check.
247-
248-
binary or text check.
249-
250-
binary charactor is below.(non-utf8)
251-
0x07-0x0a, 0x0c-0x0d, 0x20-0x7e, 0x80-0xFF
252-
253-
if there are include, it's binary judgment.
247+
Check whether `buf` contains binary data.
248+
Returns true if `buf` contains any non-utf-8 characters.
254249
255250
Args:
256-
buf (bytes): bytes in target file.
251+
buf (bytes): data to inspect
257252
258253
Returns:
259-
is binary(True) or text(False).
254+
True if data is binary, otherwise False
260255
"""
261256
return bool(buf.translate(None, self._textchars))
262257

0 commit comments

Comments
 (0)