Skip to content

Commit 428c981

Browse files
authored
gh-112962: in dis module, put cache information in the Instruction instead of creating fake Instructions to represent it (#113016)
1 parent 3531ea4 commit 428c981

File tree

7 files changed

+114
-46
lines changed

7 files changed

+114
-46
lines changed

Doc/library/dis.rst

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -328,13 +328,17 @@ operation is being performed, so the intermediate analysis object isn't useful:
328328
source line information (if any) is taken directly from the disassembled code
329329
object.
330330

331-
The *show_caches* and *adaptive* parameters work as they do in :func:`dis`.
331+
The *adaptive* parameter works as it does in :func:`dis`.
332332

333333
.. versionadded:: 3.4
334334

335335
.. versionchanged:: 3.11
336336
Added the *show_caches* and *adaptive* parameters.
337337

338+
.. versionchanged:: 3.13
339+
The *show_caches* parameter is deprecated and has no effect. The *cache_info*
340+
field of each instruction is populated regardless of its value.
341+
338342

339343
.. function:: findlinestarts(code)
340344

@@ -482,6 +486,14 @@ details of bytecode instructions as :class:`Instruction` instances:
482486
:class:`dis.Positions` object holding the
483487
start and end locations that are covered by this instruction.
484488

489+
.. data::cache_info
490+
491+
Information about the cache entries of this instruction, as
492+
triplets of the form ``(name, size, data)``, where the ``name``
493+
and ``size`` describe the cache format and data is the contents
494+
of the cache. ``cache_info`` is ``None`` if the instruction does not have
495+
caches.
496+
485497
.. versionadded:: 3.4
486498

487499
.. versionchanged:: 3.11
@@ -493,8 +505,8 @@ details of bytecode instructions as :class:`Instruction` instances:
493505
Changed field ``starts_line``.
494506

495507
Added fields ``start_offset``, ``cache_offset``, ``end_offset``,
496-
``baseopname``, ``baseopcode``, ``jump_target``, ``oparg``, and
497-
``line_number``.
508+
``baseopname``, ``baseopcode``, ``jump_target``, ``oparg``,
509+
``line_number`` and ``cache_info``.
498510

499511

500512
.. class:: Positions

Lib/dis.py

Lines changed: 43 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -267,9 +267,10 @@ def show_code(co, *, file=None):
267267
'starts_line',
268268
'line_number',
269269
'label',
270-
'positions'
270+
'positions',
271+
'cache_info',
271272
],
272-
defaults=[None, None]
273+
defaults=[None, None, None]
273274
)
274275

275276
_Instruction.opname.__doc__ = "Human readable name for operation"
@@ -286,6 +287,7 @@ def show_code(co, *, file=None):
286287
_Instruction.line_number.__doc__ = "source line number associated with this opcode (if any), otherwise None"
287288
_Instruction.label.__doc__ = "A label (int > 0) if this instruction is a jump target, otherwise None"
288289
_Instruction.positions.__doc__ = "dis.Positions object holding the span of source code covered by this instruction"
290+
_Instruction.cache_info.__doc__ = "list of (name, size, data), one for each cache entry of the instruction"
289291

290292
_ExceptionTableEntryBase = collections.namedtuple("_ExceptionTableEntryBase",
291293
"start end target depth lasti")
@@ -334,6 +336,8 @@ class Instruction(_Instruction):
334336
label - A label if this instruction is a jump target, otherwise None
335337
positions - Optional dis.Positions object holding the span of source code
336338
covered by this instruction
339+
cache_info - information about the format and content of the instruction's cache
340+
entries (if any)
337341
"""
338342

339343
@property
@@ -570,7 +574,6 @@ def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False):
570574
linestarts=linestarts,
571575
line_offset=line_offset,
572576
co_positions=co.co_positions(),
573-
show_caches=show_caches,
574577
original_code=original_code,
575578
arg_resolver=arg_resolver)
576579

@@ -645,8 +648,7 @@ def _is_backward_jump(op):
645648
'ENTER_EXECUTOR')
646649

647650
def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=None,
648-
show_caches=False, original_code=None, labels_map=None,
649-
arg_resolver=None):
651+
original_code=None, labels_map=None, arg_resolver=None):
650652
"""Iterate over the instructions in a bytecode string.
651653
652654
Generates a sequence of Instruction namedtuples giving the details of each
@@ -682,32 +684,28 @@ def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=N
682684
else:
683685
argval, argrepr = arg, repr(arg)
684686

687+
instr = Instruction(_all_opname[op], op, arg, argval, argrepr,
688+
offset, start_offset, starts_line, line_number,
689+
labels_map.get(offset, None), positions)
690+
691+
caches = _get_cache_size(_all_opname[deop])
692+
# Advance the co_positions iterator:
693+
for _ in range(caches):
694+
next(co_positions, ())
695+
696+
if caches:
697+
cache_info = []
698+
for name, size in _cache_format[opname[deop]].items():
699+
data = code[offset + 2: offset + 2 + 2 * size]
700+
cache_info.append((name, size, data))
701+
else:
702+
cache_info = None
703+
685704
yield Instruction(_all_opname[op], op, arg, argval, argrepr,
686705
offset, start_offset, starts_line, line_number,
687-
labels_map.get(offset, None), positions)
706+
labels_map.get(offset, None), positions, cache_info)
707+
688708

689-
caches = _get_cache_size(_all_opname[deop])
690-
if not caches:
691-
continue
692-
if not show_caches:
693-
# We still need to advance the co_positions iterator:
694-
for _ in range(caches):
695-
next(co_positions, ())
696-
continue
697-
for name, size in _cache_format[opname[deop]].items():
698-
for i in range(size):
699-
offset += 2
700-
# Only show the fancy argrepr for a CACHE instruction when it's
701-
# the first entry for a particular cache value:
702-
if i == 0:
703-
data = code[offset: offset + 2 * size]
704-
argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}"
705-
else:
706-
argrepr = ""
707-
yield Instruction(
708-
"CACHE", CACHE, 0, None, argrepr, offset, offset, False, None, None,
709-
Positions(*next(co_positions, ()))
710-
)
711709

712710
def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False,
713711
show_offsets=False):
@@ -787,7 +785,6 @@ def _disassemble_bytes(code, lasti=-1, varname_from_oparg=None,
787785
instrs = _get_instructions_bytes(code, linestarts=linestarts,
788786
line_offset=line_offset,
789787
co_positions=co_positions,
790-
show_caches=show_caches,
791788
original_code=original_code,
792789
labels_map=labels_map,
793790
arg_resolver=arg_resolver)
@@ -805,6 +802,23 @@ def print_instructions(instrs, exception_entries, formatter, show_caches=False,
805802
is_current_instr = instr.offset <= lasti \
806803
<= instr.offset + 2 * _get_cache_size(_all_opname[_deoptop(instr.opcode)])
807804
formatter.print_instruction(instr, is_current_instr)
805+
deop = _deoptop(instr.opcode)
806+
if show_caches and instr.cache_info:
807+
offset = instr.offset
808+
for name, size, data in instr.cache_info:
809+
for i in range(size):
810+
offset += 2
811+
# Only show the fancy argrepr for a CACHE instruction when it's
812+
# the first entry for a particular cache value:
813+
if i == 0:
814+
argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}"
815+
else:
816+
argrepr = ""
817+
formatter.print_instruction(
818+
Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset,
819+
False, None, None, instr.positions),
820+
is_current_instr)
821+
808822
formatter.print_exception_table(exception_entries)
809823

810824
def _disassemble_str(source, **kwargs):
@@ -952,7 +966,6 @@ def __iter__(self):
952966
linestarts=self._linestarts,
953967
line_offset=self._line_offset,
954968
co_positions=co.co_positions(),
955-
show_caches=self.show_caches,
956969
original_code=original_code,
957970
labels_map=labels_map,
958971
arg_resolver=arg_resolver)

Lib/test/support/bytecode_helper.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,18 @@
77

88
_UNSPECIFIED = object()
99

10+
def instructions_with_positions(instrs, co_positions):
11+
# Return (instr, positions) pairs from the instrs list and co_positions
12+
# iterator. The latter contains items for cache lines and the former
13+
# doesn't, so those need to be skipped.
14+
15+
co_positions = co_positions or iter(())
16+
for instr in instrs:
17+
yield instr, next(co_positions, ())
18+
for _, size, _ in (instr.cache_info or ()):
19+
for i in range(size):
20+
next(co_positions, ())
21+
1022
class BytecodeTestCase(unittest.TestCase):
1123
"""Custom assertion methods for inspecting bytecode."""
1224

Lib/test/test_code.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@
144144
gc_collect)
145145
from test.support.script_helper import assert_python_ok
146146
from test.support import threading_helper
147+
from test.support.bytecode_helper import (BytecodeTestCase,
148+
instructions_with_positions)
147149
from opcode import opmap, opname
148150
COPY_FREE_VARS = opmap['COPY_FREE_VARS']
149151

@@ -384,10 +386,8 @@ def test_co_positions_artificial_instructions(self):
384386
code = traceback.tb_frame.f_code
385387

386388
artificial_instructions = []
387-
for instr, positions in zip(
388-
dis.get_instructions(code, show_caches=True),
389-
code.co_positions(),
390-
strict=True
389+
for instr, positions in instructions_with_positions(
390+
dis.get_instructions(code), code.co_positions()
391391
):
392392
# If any of the positions is None, then all have to
393393
# be None as well for the case above. There are still

Lib/test/test_compile.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from test import support
1313
from test.support import (script_helper, requires_debug_ranges,
1414
requires_specialization, Py_C_RECURSION_LIMIT)
15+
from test.support.bytecode_helper import instructions_with_positions
1516
from test.support.os_helper import FakePath
1617

1718
class TestSpecifics(unittest.TestCase):
@@ -1346,8 +1347,8 @@ def generic_visit(self, node):
13461347
def assertOpcodeSourcePositionIs(self, code, opcode,
13471348
line, end_line, column, end_column, occurrence=1):
13481349

1349-
for instr, position in zip(
1350-
dis.Bytecode(code, show_caches=True), code.co_positions(), strict=True
1350+
for instr, position in instructions_with_positions(
1351+
dis.Bytecode(code), code.co_positions()
13511352
):
13521353
if instr.opname == opcode:
13531354
occurrence -= 1

Lib/test/test_dis.py

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import opcode
1515

16+
CACHE = dis.opmap["CACHE"]
1617

1718
def get_tb():
1819
def _error():
@@ -1227,9 +1228,9 @@ def f():
12271228
else:
12281229
# "copy" the code to un-quicken it:
12291230
f.__code__ = f.__code__.replace()
1230-
for instruction in dis.get_instructions(
1231+
for instruction in _unroll_caches_as_Instructions(dis.get_instructions(
12311232
f, show_caches=True, adaptive=adaptive
1232-
):
1233+
), show_caches=True):
12331234
if instruction.opname == "CACHE":
12341235
yield instruction.argrepr
12351236

@@ -1262,7 +1263,8 @@ def f():
12621263
# However, this might change in the future. So we explicitly try to find
12631264
# a CACHE entry in the instructions. If we can't do that, fail the test
12641265

1265-
for inst in dis.get_instructions(f, show_caches=True):
1266+
for inst in _unroll_caches_as_Instructions(
1267+
dis.get_instructions(f, show_caches=True), show_caches=True):
12661268
if inst.opname == "CACHE":
12671269
op_offset = inst.offset - 2
12681270
cache_offset = inst.offset
@@ -1775,8 +1777,8 @@ def simple(): pass
17751777
class InstructionTestCase(BytecodeTestCase):
17761778

17771779
def assertInstructionsEqual(self, instrs_1, instrs_2, /):
1778-
instrs_1 = [instr_1._replace(positions=None) for instr_1 in instrs_1]
1779-
instrs_2 = [instr_2._replace(positions=None) for instr_2 in instrs_2]
1780+
instrs_1 = [instr_1._replace(positions=None, cache_info=None) for instr_1 in instrs_1]
1781+
instrs_2 = [instr_2._replace(positions=None, cache_info=None) for instr_2 in instrs_2]
17801782
self.assertEqual(instrs_1, instrs_2)
17811783

17821784
class InstructionTests(InstructionTestCase):
@@ -1890,9 +1892,9 @@ def roots(a, b, c):
18901892
instruction.positions.col_offset,
18911893
instruction.positions.end_col_offset,
18921894
)
1893-
for instruction in dis.get_instructions(
1895+
for instruction in _unroll_caches_as_Instructions(dis.get_instructions(
18941896
code, adaptive=adaptive, show_caches=show_caches
1895-
)
1897+
), show_caches=show_caches)
18961898
]
18971899
self.assertEqual(co_positions, dis_positions)
18981900

@@ -2233,6 +2235,31 @@ def get_disassembly(self, tb):
22332235
dis.distb(tb, file=output)
22342236
return output.getvalue()
22352237

2238+
def _unroll_caches_as_Instructions(instrs, show_caches=False):
2239+
# Cache entries are no longer reported by dis as fake instructions,
2240+
# but some tests assume that do. We should rewrite the tests to assume
2241+
# the new API, but it will be clearer to keep the tests working as
2242+
# before and do that in a separate PR.
2243+
2244+
for instr in instrs:
2245+
yield instr
2246+
if not show_caches:
2247+
continue
2248+
2249+
offset = instr.offset
2250+
for name, size, data in (instr.cache_info or ()):
2251+
for i in range(size):
2252+
offset += 2
2253+
# Only show the fancy argrepr for a CACHE instruction when it's
2254+
# the first entry for a particular cache value:
2255+
if i == 0:
2256+
argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}"
2257+
else:
2258+
argrepr = ""
2259+
2260+
yield Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset,
2261+
False, None, None, instr.positions)
2262+
22362263

22372264
if __name__ == "__main__":
22382265
unittest.main()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:mod:`dis` module functions add cache information to the
2+
:class:`~dis.Instruction` instance rather than creating fake
3+
:class:`~dis.Instruction` instances to represent the cache entries.

0 commit comments

Comments
 (0)