Skip to content

Commit 2644cbd

Browse files
authored
Use cython's cast for converting encoding and errors (#279)
It is little faster on Python 3 because we can skip temporary bytes object
1 parent 3510239 commit 2644cbd

File tree

2 files changed

+32
-53
lines changed

2 files changed

+32
-53
lines changed

msgpack/_packer.pyx

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
# coding: utf-8
2-
#cython: embedsignature=True
2+
#cython: embedsignature=True, c_string_encoding=ascii
33

44
from cpython cimport *
5+
from cpython.version cimport PY_MAJOR_VERSION
56
from cpython.exc cimport PyErr_WarnEx
67

78
from msgpack.exceptions import PackValueError, PackOverflowError
@@ -99,8 +100,8 @@ cdef class Packer(object):
99100
cdef object _default
100101
cdef object _bencoding
101102
cdef object _berrors
102-
cdef char *encoding
103-
cdef char *unicode_errors
103+
cdef const char *encoding
104+
cdef const char *unicode_errors
104105
cdef bint strict_types
105106
cdef bool use_float
106107
cdef bint autoreset
@@ -126,26 +127,21 @@ cdef class Packer(object):
126127
if not PyCallable_Check(default):
127128
raise TypeError("default must be a callable.")
128129
self._default = default
129-
if encoding is None and unicode_errors is None:
130-
self.encoding = NULL
131-
self.unicode_errors = NULL
132-
else:
133-
if encoding is None:
130+
131+
self._bencoding = encoding
132+
if encoding is None:
133+
if PY_MAJOR_VERSION < 3:
134134
self.encoding = 'utf-8'
135135
else:
136-
if isinstance(encoding, unicode):
137-
self._bencoding = encoding.encode('ascii')
138-
else:
139-
self._bencoding = encoding
140-
self.encoding = PyBytes_AsString(self._bencoding)
141-
if unicode_errors is None:
142-
self.unicode_errors = 'strict'
143-
else:
144-
if isinstance(unicode_errors, unicode):
145-
self._berrors = unicode_errors.encode('ascii')
146-
else:
147-
self._berrors = unicode_errors
148-
self.unicode_errors = PyBytes_AsString(self._berrors)
136+
self.encoding = NULL
137+
else:
138+
self.encoding = self._bencoding
139+
140+
self._berrors = unicode_errors
141+
if unicode_errors is None:
142+
self.unicode_errors = NULL
143+
else:
144+
self.unicode_errors = self._berrors
149145

150146
def __dealloc__(self):
151147
PyMem_Free(self.pk.buf)
@@ -212,7 +208,7 @@ cdef class Packer(object):
212208
if ret == 0:
213209
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
214210
elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
215-
if self.encoding == NULL:
211+
if self.encoding == NULL and self.unicode_errors == NULL:
216212
ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT);
217213
if ret == -2:
218214
raise PackValueError("unicode string is too large")

msgpack/_unpacker.pyx

Lines changed: 14 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# coding: utf-8
2-
#cython: embedsignature=True
2+
#cython: embedsignature=True, c_string_encoding=ascii
33

4+
from cpython.version cimport PY_MAJOR_VERSION
45
from cpython.bytes cimport (
56
PyBytes_AsString,
67
PyBytes_FromStringAndSize,
@@ -75,7 +76,7 @@ cdef inline init_ctx(unpack_context *ctx,
7576
object object_hook, object object_pairs_hook,
7677
object list_hook, object ext_hook,
7778
bint use_list, bint raw,
78-
char* encoding, char* unicode_errors,
79+
const char* encoding, const char* unicode_errors,
7980
Py_ssize_t max_str_len, Py_ssize_t max_bin_len,
8081
Py_ssize_t max_array_len, Py_ssize_t max_map_len,
8182
Py_ssize_t max_ext_len):
@@ -180,24 +181,16 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
180181
cdef Py_buffer view
181182
cdef char* buf = NULL
182183
cdef Py_ssize_t buf_len
183-
cdef char* cenc = NULL
184-
cdef char* cerr = NULL
184+
cdef const char* cenc = NULL
185+
cdef const char* cerr = NULL
185186
cdef int new_protocol = 0
186187

187188
if encoding is not None:
188189
PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1)
189-
if isinstance(encoding, unicode):
190-
encoding = encoding.encode('ascii')
191-
elif not isinstance(encoding, bytes):
192-
raise TypeError("encoding should be bytes or unicode")
193-
cenc = PyBytes_AsString(encoding)
190+
cenc = encoding
194191

195192
if unicode_errors is not None:
196-
if isinstance(unicode_errors, unicode):
197-
unicode_errors = unicode_errors.encode('ascii')
198-
elif not isinstance(unicode_errors, bytes):
199-
raise TypeError("unicode_errors should be bytes or unicode")
200-
cerr = PyBytes_AsString(unicode_errors)
193+
cerr = unicode_errors
201194

202195
get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol)
203196
try:
@@ -219,7 +212,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
219212

220213

221214
def unpack(object stream, object object_hook=None, object list_hook=None,
222-
bint use_list=1, encoding=None, unicode_errors="strict",
215+
bint use_list=1, encoding=None, unicode_errors=None,
223216
object_pairs_hook=None, ext_hook=ExtType,
224217
Py_ssize_t max_str_len=2147483647, # 2**32-1
225218
Py_ssize_t max_bin_len=2147483647,
@@ -352,8 +345,8 @@ cdef class Unpacker(object):
352345
Py_ssize_t max_array_len=2147483647,
353346
Py_ssize_t max_map_len=2147483647,
354347
Py_ssize_t max_ext_len=2147483647):
355-
cdef char *cenc=NULL,
356-
cdef char *cerr=NULL
348+
cdef const char *cenc=NULL,
349+
cdef const char *cerr=NULL
357350

358351
self.object_hook = object_hook
359352
self.object_pairs_hook = object_pairs_hook
@@ -383,22 +376,12 @@ cdef class Unpacker(object):
383376

384377
if encoding is not None:
385378
PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1)
386-
if isinstance(encoding, unicode):
387-
self.encoding = encoding.encode('ascii')
388-
elif isinstance(encoding, bytes):
389-
self.encoding = encoding
390-
else:
391-
raise TypeError("encoding should be bytes or unicode")
392-
cenc = PyBytes_AsString(self.encoding)
379+
self.encoding = encoding
380+
cenc = encoding
393381

394382
if unicode_errors is not None:
395-
if isinstance(unicode_errors, unicode):
396-
self.unicode_errors = unicode_errors.encode('ascii')
397-
elif isinstance(unicode_errors, bytes):
398-
self.unicode_errors = unicode_errors
399-
else:
400-
raise TypeError("unicode_errors should be bytes or unicode")
401-
cerr = PyBytes_AsString(self.unicode_errors)
383+
self.unicode_errors = unicode_errors
384+
cerr = unicode_errors
402385

403386
init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
404387
ext_hook, use_list, raw, cenc, cerr,

0 commit comments

Comments
 (0)