4
4
#include "pycore_code.h" // write_location_entry_start()
5
5
#include "pycore_compile.h"
6
6
#include "pycore_opcode.h" // _PyOpcode_Caches[] and opcode category macros
7
+ #include "pycore_opcode_utils.h" // IS_BACKWARDS_JUMP_OPCODE
7
8
#include "opcode_metadata.h" // IS_PSEUDO_INSTR
8
9
9
10
@@ -34,6 +35,18 @@ same_location(location a, location b)
34
35
a .end_col_offset == b .end_col_offset ;
35
36
}
36
37
38
+ static int
39
+ instr_size (instruction * instr )
40
+ {
41
+ int opcode = instr -> i_opcode ;
42
+ int oparg = instr -> i_oparg ;
43
+ assert (!IS_PSEUDO_INSTR (opcode ));
44
+ assert (OPCODE_HAS_ARG (opcode ) || oparg == 0 );
45
+ int extended_args = (0xFFFFFF < oparg ) + (0xFFFF < oparg ) + (0xFF < oparg );
46
+ int caches = _PyOpcode_Caches [opcode ];
47
+ return extended_args + 1 + caches ;
48
+ }
49
+
37
50
struct assembler {
38
51
PyObject * a_bytecode ; /* bytes containing bytecode */
39
52
int a_offset ; /* offset into bytecode */
@@ -118,6 +131,7 @@ assemble_emit_exception_table_item(struct assembler *a, int value, int msb)
118
131
119
132
static int
120
133
assemble_emit_exception_table_entry (struct assembler * a , int start , int end ,
134
+ int handler_offset ,
121
135
_PyCompile_ExceptHandlerInfo * handler )
122
136
{
123
137
Py_ssize_t len = PyBytes_GET_SIZE (a -> a_except_table );
@@ -126,7 +140,7 @@ assemble_emit_exception_table_entry(struct assembler *a, int start, int end,
126
140
}
127
141
int size = end - start ;
128
142
assert (end > start );
129
- int target = handler -> h_offset ;
143
+ int target = handler_offset ;
130
144
int depth = handler -> h_startdepth - 1 ;
131
145
if (handler -> h_preserve_lasti > 0 ) {
132
146
depth -= 1 ;
@@ -145,24 +159,30 @@ assemble_exception_table(struct assembler *a, instr_sequence *instrs)
145
159
{
146
160
int ioffset = 0 ;
147
161
_PyCompile_ExceptHandlerInfo handler ;
148
- handler .h_offset = -1 ;
162
+ handler .h_label = -1 ;
149
163
handler .h_startdepth = -1 ;
150
164
handler .h_preserve_lasti = -1 ;
151
165
int start = -1 ;
152
166
for (int i = 0 ; i < instrs -> s_used ; i ++ ) {
153
167
instruction * instr = & instrs -> s_instrs [i ];
154
- if (instr -> i_except_handler_info .h_offset != handler .h_offset ) {
155
- if (handler .h_offset >= 0 ) {
168
+ if (instr -> i_except_handler_info .h_label != handler .h_label ) {
169
+ if (handler .h_label >= 0 ) {
170
+ int handler_offset = instrs -> s_instrs [handler .h_label ].i_offset ;
156
171
RETURN_IF_ERROR (
157
- assemble_emit_exception_table_entry (a , start , ioffset , & handler ));
172
+ assemble_emit_exception_table_entry (a , start , ioffset ,
173
+ handler_offset ,
174
+ & handler ));
158
175
}
159
176
start = ioffset ;
160
177
handler = instr -> i_except_handler_info ;
161
178
}
162
- ioffset += _PyCompile_InstrSize (instr -> i_opcode , instr -> i_oparg );
179
+ ioffset += instr_size (instr );
163
180
}
164
- if (handler .h_offset >= 0 ) {
165
- RETURN_IF_ERROR (assemble_emit_exception_table_entry (a , start , ioffset , & handler ));
181
+ if (handler .h_label >= 0 ) {
182
+ int handler_offset = instrs -> s_instrs [handler .h_label ].i_offset ;
183
+ RETURN_IF_ERROR (assemble_emit_exception_table_entry (a , start , ioffset ,
184
+ handler_offset ,
185
+ & handler ));
166
186
}
167
187
return SUCCESS ;
168
188
}
@@ -329,7 +349,7 @@ assemble_location_info(struct assembler *a, instr_sequence *instrs,
329
349
loc = instr -> i_loc ;
330
350
size = 0 ;
331
351
}
332
- size += _PyCompile_InstrSize (instr -> i_opcode , instr -> i_oparg );
352
+ size += instr_size (instr );
333
353
}
334
354
RETURN_IF_ERROR (assemble_emit_location (a , loc , size ));
335
355
return SUCCESS ;
@@ -385,7 +405,7 @@ assemble_emit_instr(struct assembler *a, instruction *instr)
385
405
Py_ssize_t len = PyBytes_GET_SIZE (a -> a_bytecode );
386
406
_Py_CODEUNIT * code ;
387
407
388
- int size = _PyCompile_InstrSize (instr -> i_opcode , instr -> i_oparg );
408
+ int size = instr_size (instr );
389
409
if (a -> a_offset + size >= len / (int )sizeof (_Py_CODEUNIT )) {
390
410
if (len > PY_SSIZE_T_MAX / 2 ) {
391
411
return ERROR ;
@@ -585,12 +605,83 @@ makecode(_PyCompile_CodeUnitMetadata *umd, struct assembler *a, PyObject *const_
585
605
return co ;
586
606
}
587
607
608
+ static int
609
+ resolve_jump_offsets (instr_sequence * instrs )
610
+ {
611
+ /* Compute the size of each instruction and fixup jump args.
612
+ * Replace instruction index with position in bytecode.
613
+ */
614
+
615
+ for (int i = 0 ; i < instrs -> s_used ; i ++ ) {
616
+ instruction * instr = & instrs -> s_instrs [i ];
617
+ if (OPCODE_HAS_JUMP (instr -> i_opcode )) {
618
+ instr -> i_target = instr -> i_oparg ;
619
+ }
620
+ }
621
+
622
+ int extended_arg_recompile ;
623
+
624
+ do {
625
+ int totsize = 0 ;
626
+ for (int i = 0 ; i < instrs -> s_used ; i ++ ) {
627
+ instruction * instr = & instrs -> s_instrs [i ];
628
+ instr -> i_offset = totsize ;
629
+ int isize = instr_size (instr );
630
+ totsize += isize ;
631
+ }
632
+ extended_arg_recompile = 0 ;
633
+
634
+ int offset = 0 ;
635
+ for (int i = 0 ; i < instrs -> s_used ; i ++ ) {
636
+ instruction * instr = & instrs -> s_instrs [i ];
637
+ int isize = instr_size (instr );
638
+ /* jump offsets are computed relative to
639
+ * the instruction pointer after fetching
640
+ * the jump instruction.
641
+ */
642
+ offset += isize ;
643
+ if (OPCODE_HAS_JUMP (instr -> i_opcode )) {
644
+ instruction * target = & instrs -> s_instrs [instr -> i_target ];
645
+ instr -> i_oparg = target -> i_offset ;
646
+ if (instr -> i_oparg < offset ) {
647
+ assert (IS_BACKWARDS_JUMP_OPCODE (instr -> i_opcode ));
648
+ instr -> i_oparg = offset - instr -> i_oparg ;
649
+ }
650
+ else {
651
+ assert (!IS_BACKWARDS_JUMP_OPCODE (instr -> i_opcode ));
652
+ instr -> i_oparg = instr -> i_oparg - offset ;
653
+ }
654
+ if (instr_size (instr ) != isize ) {
655
+ extended_arg_recompile = 1 ;
656
+ }
657
+ }
658
+ }
659
+ /* XXX: This is an awful hack that could hurt performance, but
660
+ on the bright side it should work until we come up
661
+ with a better solution.
662
+
663
+ The issue is that in the first loop instr_size() is
664
+ called, and it requires i_oparg be set appropriately.
665
+ There is a bootstrap problem because i_oparg is
666
+ calculated in the second loop above.
667
+
668
+ So we loop until we stop seeing new EXTENDED_ARGs.
669
+ The only EXTENDED_ARGs that could be popping up are
670
+ ones in jump instructions. So this should converge
671
+ fairly quickly.
672
+ */
673
+ } while (extended_arg_recompile );
674
+ return SUCCESS ;
675
+ }
588
676
589
677
PyCodeObject *
590
678
_PyAssemble_MakeCodeObject (_PyCompile_CodeUnitMetadata * umd , PyObject * const_cache ,
591
679
PyObject * consts , int maxdepth , instr_sequence * instrs ,
592
680
int nlocalsplus , int code_flags , PyObject * filename )
593
681
{
682
+ if (resolve_jump_offsets (instrs ) < 0 ) {
683
+ return NULL ;
684
+ }
594
685
PyCodeObject * co = NULL ;
595
686
596
687
struct assembler a ;
0 commit comments