Skip to content

bpo-22385: Support output separators in hex methods. #13578

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
May 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions Doc/library/binascii.rst
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ The :mod:`binascii` module defines the following functions:
platforms, use ``crc32(data) & 0xffffffff``.


.. function:: b2a_hex(data)
hexlify(data)
.. function:: b2a_hex(data[, sep[, bytes_per_sep=1]])
hexlify(data[, sep[, bytes_per_sep=1]])

Return the hexadecimal representation of the binary *data*. Every byte of
*data* is converted into the corresponding 2-digit hex representation. The
Expand All @@ -155,6 +155,24 @@ The :mod:`binascii` module defines the following functions:
Similar functionality (but returning a text string) is also conveniently
accessible using the :meth:`bytes.hex` method.

If *sep* is specified, it must be a single character str or bytes object.
It will be inserted in the output after every *bytes_per_sep* input bytes.
Separator placement is counted from the right end of the output by default,
if you wish to count from the left, supply a negative *bytes_per_sep* value.

>>> import binascii
>>> binascii.b2a_hex(b'\xb9\x01\xef')
b'b901ef'
>>> binascii.hexlify(b'\xb9\x01\xef', '-')
b'b9-01-ef'
>>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2)
b'b9_01ef'
>>> binascii.b2a_hex(b'\xb9\x01\xef', b' ', -2)
b'b901 ef'

.. versionchanged:: 3.8
The *sep* and *bytes_per_sep* parameters were added.

.. function:: a2b_hex(hexstr)
unhexlify(hexstr)

Expand Down
18 changes: 18 additions & 0 deletions Doc/library/stdtypes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2404,8 +2404,26 @@ data and are closely related to string objects in a variety of other ways.
>>> b'\xf0\xf1\xf2'.hex()
'f0f1f2'

If you want to make the hex string easier to read, you can specify a
single character separator *sep* parameter to include in the output.
By default between each byte. A second optional *bytes_per_sep*
parameter controls the spacing. Positive values calculate the
separator position from the right, negative values from the left.

>>> value = b'\xf0\xf1\xf2'
>>> value.hex('-')
'f0-f1-f2'
>>> value.hex('_', 2)
'f0_f1f2'
>>> b'UUDDLRLRAB'.hex(' ', -4)
'55554444 4c524c52 4142'

.. versionadded:: 3.5

.. versionchanged:: 3.8
:meth:`bytes.hex` now supports optional *sep* and *bytes_per_sep*
parameters to insert separators between bytes in the hex output.

Since bytes objects are sequences of integers (akin to a tuple), for a bytes
object *b*, ``b[0]`` will be an integer, while ``b[0:1]`` will be a bytes
object of length 1. (This contrasts with text strings, where both indexing
Expand Down
3 changes: 3 additions & 0 deletions Include/pystrhex.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ extern "C" {
PyAPI_FUNC(PyObject*) _Py_strhex(const char* argbuf, const Py_ssize_t arglen);
/* Returns a bytes() containing the ASCII hex representation of argbuf. */
PyAPI_FUNC(PyObject*) _Py_strhex_bytes(const char* argbuf, const Py_ssize_t arglen);
/* These variants include support for a separator between every N bytes: */
PyAPI_FUNC(PyObject*) _Py_strhex_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group);
PyAPI_FUNC(PyObject*) _Py_strhex_bytes_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group);
#endif /* !Py_LIMITED_API */

#ifdef __cplusplus
Expand Down
12 changes: 12 additions & 0 deletions Lib/test/test_binascii.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,18 @@ def test_hex(self):
self.assertEqual(binascii.hexlify(self.type2test(s)), t)
self.assertEqual(binascii.unhexlify(self.type2test(t)), u)

def test_hex_separator(self):
"""Test that hexlify and b2a_hex are binary versions of bytes.hex."""
# Logic of separators is tested in test_bytes.py. This checks that
# arg parsing works and exercises the direct to bytes object code
# path within pystrhex.c.
s = b'{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000'
self.assertEqual(binascii.hexlify(self.type2test(s)), s.hex().encode('ascii'))
expected8 = s.hex('.', 8).encode('ascii')
self.assertEqual(binascii.hexlify(self.type2test(s), '.', 8), expected8)
expected1 = s.hex(':').encode('ascii')
self.assertEqual(binascii.b2a_hex(self.type2test(s), ':'), expected1)

def test_qp(self):
type2test = self.type2test
a2b_qp = binascii.a2b_qp
Expand Down
57 changes: 57 additions & 0 deletions Lib/test/test_bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,63 @@ def test_hex(self):
self.assertEqual(self.type2test(b"\x1a\x2b\x30").hex(), '1a2b30')
self.assertEqual(memoryview(b"\x1a\x2b\x30").hex(), '1a2b30')

def test_hex_separator_basics(self):
three_bytes = self.type2test(b'\xb9\x01\xef')
self.assertEqual(three_bytes.hex(), 'b901ef')
with self.assertRaises(ValueError):
three_bytes.hex('')
with self.assertRaises(ValueError):
three_bytes.hex('xx')
self.assertEqual(three_bytes.hex(':', 0), 'b901ef')
with self.assertRaises(TypeError):
three_bytes.hex(None, 0)
with self.assertRaises(ValueError):
three_bytes.hex('\xff')
with self.assertRaises(ValueError):
three_bytes.hex(b'\xff')
with self.assertRaises(ValueError):
three_bytes.hex(b'\x80')
with self.assertRaises(ValueError):
three_bytes.hex(chr(0x100))
self.assertEqual(three_bytes.hex(':', 0), 'b901ef')
self.assertEqual(three_bytes.hex(b'\x00'), 'b9\x0001\x00ef')
self.assertEqual(three_bytes.hex('\x00'), 'b9\x0001\x00ef')
self.assertEqual(three_bytes.hex(b'\x7f'), 'b9\x7f01\x7fef')
self.assertEqual(three_bytes.hex('\x7f'), 'b9\x7f01\x7fef')
self.assertEqual(three_bytes.hex(':', 3), 'b901ef')
self.assertEqual(three_bytes.hex(':', 4), 'b901ef')
self.assertEqual(three_bytes.hex(':', -4), 'b901ef')
self.assertEqual(three_bytes.hex(':'), 'b9:01:ef')
self.assertEqual(three_bytes.hex(b'$'), 'b9$01$ef')
self.assertEqual(three_bytes.hex(':', 1), 'b9:01:ef')
self.assertEqual(three_bytes.hex(':', -1), 'b9:01:ef')
self.assertEqual(three_bytes.hex(':', 2), 'b9:01ef')
self.assertEqual(three_bytes.hex(':', 1), 'b9:01:ef')
self.assertEqual(three_bytes.hex('*', -2), 'b901*ef')

value = b'{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000'
self.assertEqual(value.hex('.', 8), '7b7305000000776f.726c646902000000.730500000068656c.6c6f690100000030')

def test_hex_separator_five_bytes(self):
five_bytes = self.type2test(range(90,95))
self.assertEqual(five_bytes.hex(), '5a5b5c5d5e')

def test_hex_separator_six_bytes(self):
six_bytes = self.type2test(x*3 for x in range(1, 7))
self.assertEqual(six_bytes.hex(), '0306090c0f12')
self.assertEqual(six_bytes.hex('.', 1), '03.06.09.0c.0f.12')
self.assertEqual(six_bytes.hex(' ', 2), '0306 090c 0f12')
self.assertEqual(six_bytes.hex('-', 3), '030609-0c0f12')
self.assertEqual(six_bytes.hex(':', 4), '0306:090c0f12')
self.assertEqual(six_bytes.hex(':', 5), '03:06090c0f12')
self.assertEqual(six_bytes.hex(':', 6), '0306090c0f12')
self.assertEqual(six_bytes.hex(':', 95), '0306090c0f12')
self.assertEqual(six_bytes.hex('_', -3), '030609_0c0f12')
self.assertEqual(six_bytes.hex(':', -4), '0306090c:0f12')
self.assertEqual(six_bytes.hex(b'@', -5), '0306090c0f@12')
self.assertEqual(six_bytes.hex(':', -6), '0306090c0f12')
self.assertEqual(six_bytes.hex(' ', -95), '0306090c0f12')

def test_join(self):
self.assertEqual(self.type2test(b"").join([]), b"")
self.assertEqual(self.type2test(b"").join([b""]), b"")
Expand Down
5 changes: 4 additions & 1 deletion Lib/test/test_doctest.py
Original file line number Diff line number Diff line change
Expand Up @@ -665,18 +665,21 @@ def non_Python_modules(): r"""
True
>>> real_tests = [t for t in tests if len(t.examples) > 0]
>>> len(real_tests) # objects that actually have doctests
9
12
>>> for t in real_tests:
... print('{} {}'.format(len(t.examples), t.name))
...
1 builtins.bin
5 builtins.bytearray.hex
5 builtins.bytes.hex
3 builtins.float.as_integer_ratio
2 builtins.float.fromhex
2 builtins.float.hex
1 builtins.hex
1 builtins.int
3 builtins.int.as_integer_ratio
2 builtins.int.bit_length
5 builtins.memoryview.hex
1 builtins.oct

Note here that 'bin', 'oct', and 'hex' are functions; 'float.as_integer_ratio',
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
The `bytes.hex`, `bytearray.hex`, and `memoryview.hex` methods as well as
the `binascii.hexlify` and `b2a_hex` functions now have the ability to
include an optional separator between hex bytes. This functionality was
inspired by MicroPython's hexlify implementation.
33 changes: 25 additions & 8 deletions Modules/binascii.c
Original file line number Diff line number Diff line change
Expand Up @@ -1159,34 +1159,51 @@ binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
binascii.b2a_hex

data: Py_buffer
/
sep: object = NULL
An optional single character or byte to separate hex bytes.
bytes_per_sep: int = 1
How many bytes between separators. Positive values count from the
right, negative values count from the left.

Hexadecimal representation of binary data.

The return value is a bytes object. This function is also
available as "hexlify()".

Example:
>>> binascii.b2a_hex(b'\xb9\x01\xef')
b'b901ef'
>>> binascii.hexlify(b'\xb9\x01\xef', ':')
b'b9:01:ef'
>>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2)
b'b9_01ef'
[clinic start generated code]*/

static PyObject *
binascii_b2a_hex_impl(PyObject *module, Py_buffer *data)
/*[clinic end generated code: output=92fec1a95c9897a0 input=96423cfa299ff3b1]*/
binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep,
int bytes_per_sep)
/*[clinic end generated code: output=a26937946a81d2c7 input=ec0ade6ba2e43543]*/
{
return _Py_strhex_bytes((const char *)data->buf, data->len);
return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
sep, bytes_per_sep);
}

/*[clinic input]
binascii.hexlify = binascii.b2a_hex

Hexadecimal representation of binary data.

The return value is a bytes object.
The return value is a bytes object. This function is also
available as "b2a_hex()".
[clinic start generated code]*/

static PyObject *
binascii_hexlify_impl(PyObject *module, Py_buffer *data)
/*[clinic end generated code: output=749e95e53c14880c input=2e3afae7f083f061]*/
binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep,
int bytes_per_sep)
/*[clinic end generated code: output=d12aa1b001b15199 input=bc317bd4e241f76b]*/
{
return _Py_strhex_bytes((const char *)data->buf, data->len);
return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
sep, bytes_per_sep);
}

/*[clinic input]
Expand Down
Loading