Skip to content

Commit 55ef998

Browse files
authored
gh-112720: Move dis's cache output code to the Formatter, labels lookup to the arg_resolver. Reduce the number of parameters passed around. (#113108)
1 parent 7bb00f0 commit 55ef998

File tree

2 files changed

+104
-81
lines changed

2 files changed

+104
-81
lines changed

Lib/dis.py

Lines changed: 91 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,14 @@ def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False,
113113
elif hasattr(x, 'co_code'): # Code object
114114
_disassemble_recursive(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets)
115115
elif isinstance(x, (bytes, bytearray)): # Raw bytecode
116-
_disassemble_bytes(x, file=file, show_caches=show_caches, show_offsets=show_offsets)
116+
labels_map = _make_labels_map(x)
117+
label_width = 4 + len(str(len(labels_map)))
118+
formatter = Formatter(file=file,
119+
offset_width=len(str(max(len(x) - 2, 9999))) if show_offsets else 0,
120+
label_width=label_width,
121+
show_caches=show_caches)
122+
arg_resolver = ArgResolver(labels_map=labels_map)
123+
_disassemble_bytes(x, arg_resolver=arg_resolver, formatter=formatter)
117124
elif isinstance(x, str): # Source code
118125
_disassemble_str(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets)
119126
else:
@@ -394,23 +401,41 @@ def __str__(self):
394401
class Formatter:
395402

396403
def __init__(self, file=None, lineno_width=0, offset_width=0, label_width=0,
397-
line_offset=0):
404+
line_offset=0, show_caches=False):
398405
"""Create a Formatter
399406
400407
*file* where to write the output
401408
*lineno_width* sets the width of the line number field (0 omits it)
402409
*offset_width* sets the width of the instruction offset field
403410
*label_width* sets the width of the label field
411+
*show_caches* is a boolean indicating whether to display cache lines
404412
405-
*line_offset* the line number (within the code unit)
406413
"""
407414
self.file = file
408415
self.lineno_width = lineno_width
409416
self.offset_width = offset_width
410417
self.label_width = label_width
411-
418+
self.show_caches = show_caches
412419

413420
def print_instruction(self, instr, mark_as_current=False):
421+
self.print_instruction_line(instr, mark_as_current)
422+
if self.show_caches and instr.cache_info:
423+
offset = instr.offset
424+
for name, size, data in instr.cache_info:
425+
for i in range(size):
426+
offset += 2
427+
# Only show the fancy argrepr for a CACHE instruction when it's
428+
# the first entry for a particular cache value:
429+
if i == 0:
430+
argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}"
431+
else:
432+
argrepr = ""
433+
self.print_instruction_line(
434+
Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset,
435+
False, None, None, instr.positions),
436+
False)
437+
438+
def print_instruction_line(self, instr, mark_as_current):
414439
"""Format instruction details for inclusion in disassembly output."""
415440
lineno_width = self.lineno_width
416441
offset_width = self.offset_width
@@ -474,11 +499,14 @@ def print_exception_table(self, exception_entries):
474499

475500

476501
class ArgResolver:
477-
def __init__(self, co_consts, names, varname_from_oparg, labels_map):
502+
def __init__(self, co_consts=None, names=None, varname_from_oparg=None, labels_map=None):
478503
self.co_consts = co_consts
479504
self.names = names
480505
self.varname_from_oparg = varname_from_oparg
481-
self.labels_map = labels_map
506+
self.labels_map = labels_map or {}
507+
508+
def get_label_for_offset(self, offset):
509+
return self.labels_map.get(offset, None)
482510

483511
def get_argval_argrepr(self, op, arg, offset):
484512
get_name = None if self.names is None else self.names.__getitem__
@@ -547,8 +575,7 @@ def get_argval_argrepr(self, op, arg, offset):
547575
argrepr = _intrinsic_2_descs[arg]
548576
return argval, argrepr
549577

550-
551-
def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False):
578+
def get_instructions(x, *, first_line=None, show_caches=None, adaptive=False):
552579
"""Iterator for the opcodes in methods, functions or code
553580
554581
Generates a series of Instruction named tuples giving the details of
@@ -567,9 +594,10 @@ def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False):
567594
line_offset = 0
568595

569596
original_code = co.co_code
570-
labels_map = _make_labels_map(original_code)
571-
arg_resolver = ArgResolver(co.co_consts, co.co_names, co._varname_from_oparg,
572-
labels_map)
597+
arg_resolver = ArgResolver(co_consts=co.co_consts,
598+
names=co.co_names,
599+
varname_from_oparg=co._varname_from_oparg,
600+
labels_map=_make_labels_map(original_code))
573601
return _get_instructions_bytes(_get_code_array(co, adaptive),
574602
linestarts=linestarts,
575603
line_offset=line_offset,
@@ -648,7 +676,7 @@ def _is_backward_jump(op):
648676
'ENTER_EXECUTOR')
649677

650678
def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=None,
651-
original_code=None, labels_map=None, arg_resolver=None):
679+
original_code=None, arg_resolver=None):
652680
"""Iterate over the instructions in a bytecode string.
653681
654682
Generates a sequence of Instruction namedtuples giving the details of each
@@ -661,8 +689,6 @@ def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=N
661689
original_code = original_code or code
662690
co_positions = co_positions or iter(())
663691

664-
labels_map = labels_map or _make_labels_map(original_code)
665-
666692
starts_line = False
667693
local_line_number = None
668694
line_number = None
@@ -684,10 +710,6 @@ def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=N
684710
else:
685711
argval, argrepr = arg, repr(arg)
686712

687-
instr = Instruction(_all_opname[op], op, arg, argval, argrepr,
688-
offset, start_offset, starts_line, line_number,
689-
labels_map.get(offset, None), positions)
690-
691713
caches = _get_cache_size(_all_opname[deop])
692714
# Advance the co_positions iterator:
693715
for _ in range(caches):
@@ -701,23 +723,31 @@ def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=N
701723
else:
702724
cache_info = None
703725

726+
label = arg_resolver.get_label_for_offset(offset) if arg_resolver else None
704727
yield Instruction(_all_opname[op], op, arg, argval, argrepr,
705728
offset, start_offset, starts_line, line_number,
706-
labels_map.get(offset, None), positions, cache_info)
707-
729+
label, positions, cache_info)
708730

709731

710732
def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False,
711733
show_offsets=False):
712734
"""Disassemble a code object."""
713735
linestarts = dict(findlinestarts(co))
714736
exception_entries = _parse_exception_table(co)
715-
_disassemble_bytes(_get_code_array(co, adaptive),
716-
lasti, co._varname_from_oparg,
717-
co.co_names, co.co_consts, linestarts, file=file,
718-
exception_entries=exception_entries,
719-
co_positions=co.co_positions(), show_caches=show_caches,
720-
original_code=co.co_code, show_offsets=show_offsets)
737+
labels_map = _make_labels_map(co.co_code, exception_entries=exception_entries)
738+
label_width = 4 + len(str(len(labels_map)))
739+
formatter = Formatter(file=file,
740+
lineno_width=_get_lineno_width(linestarts),
741+
offset_width=len(str(max(len(co.co_code) - 2, 9999))) if show_offsets else 0,
742+
label_width=label_width,
743+
show_caches=show_caches)
744+
arg_resolver = ArgResolver(co_consts=co.co_consts,
745+
names=co.co_names,
746+
varname_from_oparg=co._varname_from_oparg,
747+
labels_map=labels_map)
748+
_disassemble_bytes(_get_code_array(co, adaptive), lasti, linestarts,
749+
exception_entries=exception_entries, co_positions=co.co_positions(),
750+
original_code=co.co_code, arg_resolver=arg_resolver, formatter=formatter)
721751

722752
def _disassemble_recursive(co, *, file=None, depth=None, show_caches=False, adaptive=False, show_offsets=False):
723753
disassemble(co, file=file, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets)
@@ -764,60 +794,29 @@ def _get_lineno_width(linestarts):
764794
return lineno_width
765795

766796

767-
def _disassemble_bytes(code, lasti=-1, varname_from_oparg=None,
768-
names=None, co_consts=None, linestarts=None,
769-
*, file=None, line_offset=0, exception_entries=(),
770-
co_positions=None, show_caches=False, original_code=None,
771-
show_offsets=False):
772-
773-
offset_width = len(str(max(len(code) - 2, 9999))) if show_offsets else 0
774-
775-
labels_map = _make_labels_map(original_code or code, exception_entries)
776-
label_width = 4 + len(str(len(labels_map)))
797+
def _disassemble_bytes(code, lasti=-1, linestarts=None,
798+
*, line_offset=0, exception_entries=(),
799+
co_positions=None, original_code=None,
800+
arg_resolver=None, formatter=None):
777801

778-
formatter = Formatter(file=file,
779-
lineno_width=_get_lineno_width(linestarts),
780-
offset_width=offset_width,
781-
label_width=label_width,
782-
line_offset=line_offset)
802+
assert formatter is not None
803+
assert arg_resolver is not None
783804

784-
arg_resolver = ArgResolver(co_consts, names, varname_from_oparg, labels_map)
785805
instrs = _get_instructions_bytes(code, linestarts=linestarts,
786806
line_offset=line_offset,
787807
co_positions=co_positions,
788808
original_code=original_code,
789-
labels_map=labels_map,
790809
arg_resolver=arg_resolver)
791810

792-
print_instructions(instrs, exception_entries, formatter,
793-
show_caches=show_caches, lasti=lasti)
811+
print_instructions(instrs, exception_entries, formatter, lasti=lasti)
794812

795813

796-
def print_instructions(instrs, exception_entries, formatter, show_caches=False, lasti=-1):
814+
def print_instructions(instrs, exception_entries, formatter, lasti=-1):
797815
for instr in instrs:
798-
if show_caches:
799-
is_current_instr = instr.offset == lasti
800-
else:
801-
# Each CACHE takes 2 bytes
802-
is_current_instr = instr.offset <= lasti \
803-
<= instr.offset + 2 * _get_cache_size(_all_opname[_deoptop(instr.opcode)])
816+
# Each CACHE takes 2 bytes
817+
is_current_instr = instr.offset <= lasti \
818+
<= instr.offset + 2 * _get_cache_size(_all_opname[_deoptop(instr.opcode)])
804819
formatter.print_instruction(instr, is_current_instr)
805-
deop = _deoptop(instr.opcode)
806-
if show_caches and instr.cache_info:
807-
offset = instr.offset
808-
for name, size, data in instr.cache_info:
809-
for i in range(size):
810-
offset += 2
811-
# Only show the fancy argrepr for a CACHE instruction when it's
812-
# the first entry for a particular cache value:
813-
if i == 0:
814-
argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}"
815-
else:
816-
argrepr = ""
817-
formatter.print_instruction(
818-
Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset,
819-
False, None, None, instr.positions),
820-
is_current_instr)
821820

822821
formatter.print_exception_table(exception_entries)
823822

@@ -960,14 +959,15 @@ def __iter__(self):
960959
co = self.codeobj
961960
original_code = co.co_code
962961
labels_map = _make_labels_map(original_code, self.exception_entries)
963-
arg_resolver = ArgResolver(co.co_consts, co.co_names, co._varname_from_oparg,
964-
labels_map)
962+
arg_resolver = ArgResolver(co_consts=co.co_consts,
963+
names=co.co_names,
964+
varname_from_oparg=co._varname_from_oparg,
965+
labels_map=labels_map)
965966
return _get_instructions_bytes(_get_code_array(co, self.adaptive),
966967
linestarts=self._linestarts,
967968
line_offset=self._line_offset,
968969
co_positions=co.co_positions(),
969970
original_code=original_code,
970-
labels_map=labels_map,
971971
arg_resolver=arg_resolver)
972972

973973
def __repr__(self):
@@ -995,18 +995,32 @@ def dis(self):
995995
else:
996996
offset = -1
997997
with io.StringIO() as output:
998-
_disassemble_bytes(_get_code_array(co, self.adaptive),
999-
varname_from_oparg=co._varname_from_oparg,
1000-
names=co.co_names, co_consts=co.co_consts,
998+
code = _get_code_array(co, self.adaptive)
999+
offset_width = len(str(max(len(code) - 2, 9999))) if self.show_offsets else 0
1000+
1001+
1002+
labels_map = _make_labels_map(co.co_code, self.exception_entries)
1003+
label_width = 4 + len(str(len(labels_map)))
1004+
formatter = Formatter(file=output,
1005+
lineno_width=_get_lineno_width(self._linestarts),
1006+
offset_width=offset_width,
1007+
label_width=label_width,
1008+
line_offset=self._line_offset,
1009+
show_caches=self.show_caches)
1010+
1011+
arg_resolver = ArgResolver(co_consts=co.co_consts,
1012+
names=co.co_names,
1013+
varname_from_oparg=co._varname_from_oparg,
1014+
labels_map=labels_map)
1015+
_disassemble_bytes(code,
10011016
linestarts=self._linestarts,
10021017
line_offset=self._line_offset,
1003-
file=output,
10041018
lasti=offset,
10051019
exception_entries=self.exception_entries,
10061020
co_positions=co.co_positions(),
1007-
show_caches=self.show_caches,
10081021
original_code=co.co_code,
1009-
show_offsets=self.show_offsets)
1022+
arg_resolver=arg_resolver,
1023+
formatter=formatter)
10101024
return output.getvalue()
10111025

10121026

Lib/test/test_dis.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import contextlib
44
import dis
5+
import functools
56
import io
67
import re
78
import sys
@@ -1982,19 +1983,27 @@ def f(opcode, oparg, offset, *init_args):
19821983
self.assertEqual(f(opcode.opmap["BINARY_OP"], 3, *args), (3, '<<'))
19831984
self.assertEqual(f(opcode.opmap["CALL_INTRINSIC_1"], 2, *args), (2, 'INTRINSIC_IMPORT_STAR'))
19841985

1986+
def get_instructions(self, code):
1987+
return dis._get_instructions_bytes(code)
1988+
19851989
def test_start_offset(self):
19861990
# When no extended args are present,
19871991
# start_offset should be equal to offset
1992+
19881993
instructions = list(dis.Bytecode(_f))
19891994
for instruction in instructions:
19901995
self.assertEqual(instruction.offset, instruction.start_offset)
19911996

1997+
def last_item(iterable):
1998+
return functools.reduce(lambda a, b : b, iterable)
1999+
19922000
code = bytes([
19932001
opcode.opmap["LOAD_FAST"], 0x00,
19942002
opcode.opmap["EXTENDED_ARG"], 0x01,
19952003
opcode.opmap["POP_JUMP_IF_TRUE"], 0xFF,
19962004
])
1997-
jump = list(dis._get_instructions_bytes(code))[-1]
2005+
labels_map = dis._make_labels_map(code)
2006+
jump = last_item(self.get_instructions(code))
19982007
self.assertEqual(4, jump.offset)
19992008
self.assertEqual(2, jump.start_offset)
20002009

@@ -2006,7 +2015,7 @@ def test_start_offset(self):
20062015
opcode.opmap["POP_JUMP_IF_TRUE"], 0xFF,
20072016
opcode.opmap["CACHE"], 0x00,
20082017
])
2009-
jump = list(dis._get_instructions_bytes(code))[-1]
2018+
jump = last_item(self.get_instructions(code))
20102019
self.assertEqual(8, jump.offset)
20112020
self.assertEqual(2, jump.start_offset)
20122021

@@ -2021,7 +2030,7 @@ def test_start_offset(self):
20212030
opcode.opmap["POP_JUMP_IF_TRUE"], 0xFF,
20222031
opcode.opmap["CACHE"], 0x00,
20232032
])
2024-
instructions = list(dis._get_instructions_bytes(code))
2033+
instructions = list(self.get_instructions(code))
20252034
# 1st jump
20262035
self.assertEqual(4, instructions[2].offset)
20272036
self.assertEqual(2, instructions[2].start_offset)
@@ -2042,7 +2051,7 @@ def test_cache_offset_and_end_offset(self):
20422051
opcode.opmap["CACHE"], 0x00,
20432052
opcode.opmap["CACHE"], 0x00
20442053
])
2045-
instructions = list(dis._get_instructions_bytes(code))
2054+
instructions = list(self.get_instructions(code))
20462055
self.assertEqual(2, instructions[0].cache_offset)
20472056
self.assertEqual(10, instructions[0].end_offset)
20482057
self.assertEqual(12, instructions[1].cache_offset)

0 commit comments

Comments
 (0)