Skip to content

Commit 5290881

Browse files
authored
gh-106149: move jump target resolution from optimizer to assembler (#106150)
1 parent eaa1eae commit 5290881

File tree

5 files changed

+113
-105
lines changed

5 files changed

+113
-105
lines changed

Include/internal/pycore_compile.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ extern int _PyAST_Optimize(
2828
int ff_features);
2929

3030
typedef struct {
31-
int h_offset;
31+
int h_label;
3232
int h_startdepth;
3333
int h_preserve_lasti;
3434
} _PyCompile_ExceptHandlerInfo;
@@ -38,6 +38,10 @@ typedef struct {
3838
int i_oparg;
3939
_PyCompilerSrcLocation i_loc;
4040
_PyCompile_ExceptHandlerInfo i_except_handler_info;
41+
42+
/* Used by the assembler */
43+
int i_target;
44+
int i_offset;
4145
} _PyCompile_Instruction;
4246

4347
typedef struct {
@@ -85,8 +89,6 @@ int _PyCompile_EnsureArrayLargeEnough(
8589

8690
int _PyCompile_ConstCacheMergeOne(PyObject *const_cache, PyObject **obj);
8791

88-
int _PyCompile_InstrSize(int opcode, int oparg);
89-
9092
/* Access compiler internals for unit testing */
9193

9294
PyAPI_FUNC(PyObject*) _PyCompile_CodeGen(

Include/internal/pycore_flowgraph.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,6 @@ typedef struct _PyCfgBasicblock_ {
5555
int b_predecessors;
5656
/* depth of stack upon entry of block, computed by stackdepth() */
5757
int b_startdepth;
58-
/* instruction offset for block, computed by assemble_jump_offsets() */
59-
int b_offset;
6058
/* Basic block is an exception handler that preserves lasti */
6159
unsigned b_preserve_lasti : 1;
6260
/* Used by compiler passes to mark whether they have visited a basic block. */

Python/assemble.c

Lines changed: 101 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "pycore_code.h" // write_location_entry_start()
55
#include "pycore_compile.h"
66
#include "pycore_opcode.h" // _PyOpcode_Caches[] and opcode category macros
7+
#include "pycore_opcode_utils.h" // IS_BACKWARDS_JUMP_OPCODE
78
#include "opcode_metadata.h" // IS_PSEUDO_INSTR
89

910

@@ -34,6 +35,18 @@ same_location(location a, location b)
3435
a.end_col_offset == b.end_col_offset;
3536
}
3637

38+
static int
39+
instr_size(instruction *instr)
40+
{
41+
int opcode = instr->i_opcode;
42+
int oparg = instr->i_oparg;
43+
assert(!IS_PSEUDO_INSTR(opcode));
44+
assert(OPCODE_HAS_ARG(opcode) || oparg == 0);
45+
int extended_args = (0xFFFFFF < oparg) + (0xFFFF < oparg) + (0xFF < oparg);
46+
int caches = _PyOpcode_Caches[opcode];
47+
return extended_args + 1 + caches;
48+
}
49+
3750
struct assembler {
3851
PyObject *a_bytecode; /* bytes containing bytecode */
3952
int a_offset; /* offset into bytecode */
@@ -118,6 +131,7 @@ assemble_emit_exception_table_item(struct assembler *a, int value, int msb)
118131

119132
static int
120133
assemble_emit_exception_table_entry(struct assembler *a, int start, int end,
134+
int handler_offset,
121135
_PyCompile_ExceptHandlerInfo *handler)
122136
{
123137
Py_ssize_t len = PyBytes_GET_SIZE(a->a_except_table);
@@ -126,7 +140,7 @@ assemble_emit_exception_table_entry(struct assembler *a, int start, int end,
126140
}
127141
int size = end-start;
128142
assert(end > start);
129-
int target = handler->h_offset;
143+
int target = handler_offset;
130144
int depth = handler->h_startdepth - 1;
131145
if (handler->h_preserve_lasti > 0) {
132146
depth -= 1;
@@ -145,24 +159,30 @@ assemble_exception_table(struct assembler *a, instr_sequence *instrs)
145159
{
146160
int ioffset = 0;
147161
_PyCompile_ExceptHandlerInfo handler;
148-
handler.h_offset = -1;
162+
handler.h_label = -1;
149163
handler.h_startdepth = -1;
150164
handler.h_preserve_lasti = -1;
151165
int start = -1;
152166
for (int i = 0; i < instrs->s_used; i++) {
153167
instruction *instr = &instrs->s_instrs[i];
154-
if (instr->i_except_handler_info.h_offset != handler.h_offset) {
155-
if (handler.h_offset >= 0) {
168+
if (instr->i_except_handler_info.h_label != handler.h_label) {
169+
if (handler.h_label >= 0) {
170+
int handler_offset = instrs->s_instrs[handler.h_label].i_offset;
156171
RETURN_IF_ERROR(
157-
assemble_emit_exception_table_entry(a, start, ioffset, &handler));
172+
assemble_emit_exception_table_entry(a, start, ioffset,
173+
handler_offset,
174+
&handler));
158175
}
159176
start = ioffset;
160177
handler = instr->i_except_handler_info;
161178
}
162-
ioffset += _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg);
179+
ioffset += instr_size(instr);
163180
}
164-
if (handler.h_offset >= 0) {
165-
RETURN_IF_ERROR(assemble_emit_exception_table_entry(a, start, ioffset, &handler));
181+
if (handler.h_label >= 0) {
182+
int handler_offset = instrs->s_instrs[handler.h_label].i_offset;
183+
RETURN_IF_ERROR(assemble_emit_exception_table_entry(a, start, ioffset,
184+
handler_offset,
185+
&handler));
166186
}
167187
return SUCCESS;
168188
}
@@ -329,7 +349,7 @@ assemble_location_info(struct assembler *a, instr_sequence *instrs,
329349
loc = instr->i_loc;
330350
size = 0;
331351
}
332-
size += _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg);
352+
size += instr_size(instr);
333353
}
334354
RETURN_IF_ERROR(assemble_emit_location(a, loc, size));
335355
return SUCCESS;
@@ -385,7 +405,7 @@ assemble_emit_instr(struct assembler *a, instruction *instr)
385405
Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode);
386406
_Py_CODEUNIT *code;
387407

388-
int size = _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg);
408+
int size = instr_size(instr);
389409
if (a->a_offset + size >= len / (int)sizeof(_Py_CODEUNIT)) {
390410
if (len > PY_SSIZE_T_MAX / 2) {
391411
return ERROR;
@@ -585,12 +605,83 @@ makecode(_PyCompile_CodeUnitMetadata *umd, struct assembler *a, PyObject *const_
585605
return co;
586606
}
587607

608+
static int
609+
resolve_jump_offsets(instr_sequence *instrs)
610+
{
611+
/* Compute the size of each instruction and fixup jump args.
612+
* Replace instruction index with position in bytecode.
613+
*/
614+
615+
for (int i = 0; i < instrs->s_used; i++) {
616+
instruction *instr = &instrs->s_instrs[i];
617+
if (OPCODE_HAS_JUMP(instr->i_opcode)) {
618+
instr->i_target = instr->i_oparg;
619+
}
620+
}
621+
622+
int extended_arg_recompile;
623+
624+
do {
625+
int totsize = 0;
626+
for (int i = 0; i < instrs->s_used; i++) {
627+
instruction *instr = &instrs->s_instrs[i];
628+
instr->i_offset = totsize;
629+
int isize = instr_size(instr);
630+
totsize += isize;
631+
}
632+
extended_arg_recompile = 0;
633+
634+
int offset = 0;
635+
for (int i = 0; i < instrs->s_used; i++) {
636+
instruction *instr = &instrs->s_instrs[i];
637+
int isize = instr_size(instr);
638+
/* jump offsets are computed relative to
639+
* the instruction pointer after fetching
640+
* the jump instruction.
641+
*/
642+
offset += isize;
643+
if (OPCODE_HAS_JUMP(instr->i_opcode)) {
644+
instruction *target = &instrs->s_instrs[instr->i_target];
645+
instr->i_oparg = target->i_offset;
646+
if (instr->i_oparg < offset) {
647+
assert(IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode));
648+
instr->i_oparg = offset - instr->i_oparg;
649+
}
650+
else {
651+
assert(!IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode));
652+
instr->i_oparg = instr->i_oparg - offset;
653+
}
654+
if (instr_size(instr) != isize) {
655+
extended_arg_recompile = 1;
656+
}
657+
}
658+
}
659+
/* XXX: This is an awful hack that could hurt performance, but
660+
on the bright side it should work until we come up
661+
with a better solution.
662+
663+
The issue is that in the first loop instr_size() is
664+
called, and it requires i_oparg be set appropriately.
665+
There is a bootstrap problem because i_oparg is
666+
calculated in the second loop above.
667+
668+
So we loop until we stop seeing new EXTENDED_ARGs.
669+
The only EXTENDED_ARGs that could be popping up are
670+
ones in jump instructions. So this should converge
671+
fairly quickly.
672+
*/
673+
} while (extended_arg_recompile);
674+
return SUCCESS;
675+
}
588676

589677
PyCodeObject *
590678
_PyAssemble_MakeCodeObject(_PyCompile_CodeUnitMetadata *umd, PyObject *const_cache,
591679
PyObject *consts, int maxdepth, instr_sequence *instrs,
592680
int nlocalsplus, int code_flags, PyObject *filename)
593681
{
682+
if (resolve_jump_offsets(instrs) < 0) {
683+
return NULL;
684+
}
594685
PyCodeObject *co = NULL;
595686

596687
struct assembler a;

Python/compile.c

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -131,16 +131,6 @@ enum {
131131
};
132132

133133

134-
int
135-
_PyCompile_InstrSize(int opcode, int oparg)
136-
{
137-
assert(!IS_PSEUDO_INSTR(opcode));
138-
assert(OPCODE_HAS_ARG(opcode) || oparg == 0);
139-
int extended_args = (0xFFFFFF < oparg) + (0xFFFF < oparg) + (0xFF < oparg);
140-
int caches = _PyOpcode_Caches[opcode];
141-
return extended_args + 1 + caches;
142-
}
143-
144134
typedef _PyCompile_Instruction instruction;
145135
typedef _PyCompile_InstructionSequence instr_sequence;
146136

@@ -7717,17 +7707,20 @@ cfg_to_instr_sequence(cfg_builder *g, instr_sequence *seq)
77177707
RETURN_IF_ERROR(instr_sequence_use_label(seq, b->b_label.id));
77187708
for (int i = 0; i < b->b_iused; i++) {
77197709
cfg_instr *instr = &b->b_instr[i];
7710+
if (OPCODE_HAS_JUMP(instr->i_opcode)) {
7711+
instr->i_oparg = instr->i_target->b_label.id;
7712+
}
77207713
RETURN_IF_ERROR(
77217714
instr_sequence_addop(seq, instr->i_opcode, instr->i_oparg, instr->i_loc));
77227715

77237716
_PyCompile_ExceptHandlerInfo *hi = &seq->s_instrs[seq->s_used-1].i_except_handler_info;
77247717
if (instr->i_except != NULL) {
7725-
hi->h_offset = instr->i_except->b_offset;
7718+
hi->h_label = instr->i_except->b_label.id;
77267719
hi->h_startdepth = instr->i_except->b_startdepth;
77277720
hi->h_preserve_lasti = instr->i_except->b_preserve_lasti;
77287721
}
77297722
else {
7730-
hi->h_offset = -1;
7723+
hi->h_label = -1;
77317724
}
77327725
}
77337726
}

Python/flowgraph.c

Lines changed: 2 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -166,22 +166,6 @@ _PyBasicblock_InsertInstruction(basicblock *block, int pos, cfg_instr *instr) {
166166
return SUCCESS;
167167
}
168168

169-
static int
170-
instr_size(cfg_instr *instruction)
171-
{
172-
return _PyCompile_InstrSize(instruction->i_opcode, instruction->i_oparg);
173-
}
174-
175-
static int
176-
blocksize(basicblock *b)
177-
{
178-
int size = 0;
179-
for (int i = 0; i < b->b_iused; i++) {
180-
size += instr_size(&b->b_instr[i]);
181-
}
182-
return size;
183-
}
184-
185169
/* For debugging purposes only */
186170
#if 0
187171
static void
@@ -212,9 +196,9 @@ static void
212196
dump_basicblock(const basicblock *b)
213197
{
214198
const char *b_return = basicblock_returns(b) ? "return " : "";
215-
fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, offset: %d %s\n",
199+
fprintf(stderr, "%d: [EH=%d CLD=%d WRM=%d NO_FT=%d %p] used: %d, depth: %d, %s\n",
216200
b->b_label.id, b->b_except_handler, b->b_cold, b->b_warm, BB_NO_FALLTHROUGH(b), b, b->b_iused,
217-
b->b_startdepth, b->b_offset, b_return);
201+
b->b_startdepth, b_return);
218202
if (b->b_instr) {
219203
int i;
220204
for (i = 0; i < b->b_iused; i++) {
@@ -480,71 +464,11 @@ normalize_jumps(_PyCfgBuilder *g)
480464
return SUCCESS;
481465
}
482466

483-
static void
484-
resolve_jump_offsets(basicblock *entryblock)
485-
{
486-
int bsize, totsize, extended_arg_recompile;
487-
488-
/* Compute the size of each block and fixup jump args.
489-
Replace block pointer with position in bytecode. */
490-
do {
491-
totsize = 0;
492-
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
493-
bsize = blocksize(b);
494-
b->b_offset = totsize;
495-
totsize += bsize;
496-
}
497-
extended_arg_recompile = 0;
498-
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
499-
bsize = b->b_offset;
500-
for (int i = 0; i < b->b_iused; i++) {
501-
cfg_instr *instr = &b->b_instr[i];
502-
int isize = instr_size(instr);
503-
/* jump offsets are computed relative to
504-
* the instruction pointer after fetching
505-
* the jump instruction.
506-
*/
507-
bsize += isize;
508-
if (is_jump(instr)) {
509-
instr->i_oparg = instr->i_target->b_offset;
510-
if (instr->i_oparg < bsize) {
511-
assert(IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode));
512-
instr->i_oparg = bsize - instr->i_oparg;
513-
}
514-
else {
515-
assert(!IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode));
516-
instr->i_oparg -= bsize;
517-
}
518-
if (instr_size(instr) != isize) {
519-
extended_arg_recompile = 1;
520-
}
521-
}
522-
}
523-
}
524-
525-
/* XXX: This is an awful hack that could hurt performance, but
526-
on the bright side it should work until we come up
527-
with a better solution.
528-
529-
The issue is that in the first loop blocksize() is called
530-
which calls instr_size() which requires i_oparg be set
531-
appropriately. There is a bootstrap problem because
532-
i_oparg is calculated in the second loop above.
533-
534-
So we loop until we stop seeing new EXTENDED_ARGs.
535-
The only EXTENDED_ARGs that could be popping up are
536-
ones in jump instructions. So this should converge
537-
fairly quickly.
538-
*/
539-
} while (extended_arg_recompile);
540-
}
541-
542467
int
543468
_PyCfg_ResolveJumps(_PyCfgBuilder *g)
544469
{
545470
RETURN_IF_ERROR(normalize_jumps(g));
546471
assert(no_redundant_jumps(g));
547-
resolve_jump_offsets(g->g_entryblock);
548472
return SUCCESS;
549473
}
550474

0 commit comments

Comments
 (0)