@@ -224,39 +224,34 @@ class NoneSplitter(Splitter):
224
224
# non-utf8 charactors.
225
225
_textchars = bytearray ({7 , 8 , 9 , 10 , 12 , 13 , 27 } | set (range (0x20 , 0x100 )) - {0x7F })
226
226
227
- def split (self , file ):
227
+ def split (self , filename ):
228
228
"""Split a file into records using a specific strategy.
229
229
230
230
For this NoneSplitter there is no actual split happening and the file
231
231
is returned as a whole.
232
232
233
233
Args:
234
- file (str): path to the file to split
234
+ filename (str): path to the file to split
235
235
236
236
Returns: generator for the individual records that were split from
237
237
the file
238
238
"""
239
- with open (file , "rb" ) as f :
239
+ with open (filename , "rb" ) as f :
240
240
buf = f .read ()
241
241
if not self ._is_binary (buf ):
242
242
buf = buf .decode ()
243
243
yield buf
244
244
245
245
def _is_binary (self , buf ):
246
246
"""binary check.
247
-
248
- binary or text check.
249
-
250
- binary charactor is below.(non-utf8)
251
- 0x07-0x0a, 0x0c-0x0d, 0x20-0x7e, 0x80-0xFF
252
-
253
- if there are include, it's binary judgment.
247
+ Check whether `buf` contains binary data.
248
+ Returns true if `buf` contains any non-utf-8 characters.
254
249
255
250
Args:
256
- buf (bytes): bytes in target file.
251
+ buf (bytes): data to inspect
257
252
258
253
Returns:
259
- is binary(True) or text( False).
254
+ True if data is binary, otherwise False
260
255
"""
261
256
return bool (buf .translate (None , self ._textchars ))
262
257
0 commit comments