@@ -283,11 +283,16 @@ PyUnstable_Optimizer_NewCounter(void)
283
283
284
284
typedef struct {
285
285
_PyOptimizerObject base ;
286
+ int traces_executed ;
286
287
int instrs_executed ;
287
288
} UOpOptimizerObject ;
288
289
289
290
const int MAX_TRACE_LENGTH = 16 ;
290
291
292
+ // UOp opcodes are outside the range of bytecodes or pseudo ops
293
+ const int EXIT_TRACE = 512 ;
294
+ const int SET_IP = 513 ;
295
+
291
296
typedef struct {
292
297
int opcode ;
293
298
int oparg ;
@@ -297,8 +302,6 @@ typedef struct {
297
302
_PyExecutorObject executor ; // Base
298
303
UOpOptimizerObject * optimizer ;
299
304
uop_instruction trace [MAX_TRACE_LENGTH ]; // TODO: variable length
300
- // TODO: drop the rest
301
- _Py_CODEUNIT * next_instr ; // The instruction after the trace
302
305
} UOpExecutorObject ;
303
306
304
307
static void
@@ -320,25 +323,60 @@ static _PyInterpreterFrame *
320
323
uop_execute (_PyExecutorObject * executor , _PyInterpreterFrame * frame , PyObject * * stack_pointer )
321
324
{
322
325
UOpExecutorObject * self = (UOpExecutorObject * )executor ;
323
- assert (self -> trace [0 ].opcode == LOAD_FAST );
324
- int oparg = self -> trace [0 ].oparg ;
325
- fprintf (stderr , "LOAD_FAST %d\n" , oparg );
326
- self -> optimizer -> instrs_executed ++ ;
327
- PyObject * value = frame -> localsplus [oparg ];
328
- assert (value != 0 );
329
- Py_INCREF (value );
330
- * stack_pointer ++ = value ;
331
- _PyFrame_SetStackPointer (frame , stack_pointer );
332
- frame -> prev_instr = self -> next_instr - 1 ;
333
- Py_DECREF (self );
334
- return frame ;
326
+ self -> optimizer -> traces_executed ++ ;
327
+ _Py_CODEUNIT * ip_offset = (_Py_CODEUNIT * )_PyFrame_GetCode (frame )-> co_code_adaptive - 1 ;
328
+ int pc = 0 ;
329
+ for (;;) {
330
+ int opcode = self -> trace [pc ].opcode ;
331
+ int oparg = self -> trace [pc ].oparg ;
332
+ pc ++ ;
333
+ self -> optimizer -> instrs_executed ++ ;
334
+ switch (opcode ) {
335
+ // TODO: Tools/cases_generator should generate these from Python/bytecodes.c
336
+ case LOAD_FAST :
337
+ {
338
+ fprintf (stderr , "LOAD_FAST %d\n" , oparg );
339
+ PyObject * value = frame -> localsplus [oparg ];
340
+ assert (value != 0 );
341
+ Py_INCREF (value );
342
+ * stack_pointer ++ = value ;
343
+ break ;
344
+ }
345
+ case SET_IP :
346
+ {
347
+ fprintf (stderr , "SET_IP %d\n" , oparg );
348
+ frame -> prev_instr = ip_offset + oparg ;
349
+ break ;
350
+ }
351
+ case EXIT_TRACE :
352
+ {
353
+ fprintf (stderr , "EXIT_TRACE\n" );
354
+ _PyFrame_SetStackPointer (frame , stack_pointer );
355
+ Py_DECREF (self );
356
+ return frame ;
357
+ }
358
+ default :
359
+ {
360
+ fprintf (stderr , "Unknown uop %d, oparg %d\n" , opcode , oparg );
361
+ Py_FatalError ("Unknown uop" );
362
+ abort (); // Unreachable
363
+ for (;;) {}
364
+ // Really unreachable
365
+ }
366
+ }
367
+ }
335
368
}
336
369
337
370
static int
338
- translate_bytecode_to_trace (_Py_CODEUNIT * instr , uop_instruction * trace , int max_length )
371
+ translate_bytecode_to_trace (
372
+ PyCodeObject * code ,
373
+ _Py_CODEUNIT * instr ,
374
+ uop_instruction * trace ,
375
+ int max_length )
339
376
{
377
+ assert (max_length >= 3 ); // One op, one SET_IP, one END_TRACE
340
378
int trace_length = 0 ;
341
- while (trace_length < max_length ) {
379
+ while (trace_length + 2 < max_length ) {
342
380
if (trace_length >= 1 ) {
343
381
break ; // Temporarily, only handle one instruction
344
382
}
@@ -350,7 +388,12 @@ translate_bytecode_to_trace(_Py_CODEUNIT *instr, uop_instruction *trace, int max
350
388
instr ++ ;
351
389
trace_length ++ ;
352
390
}
353
- return trace_length ;
391
+ int ip_offset = instr - (_Py_CODEUNIT * )code -> co_code_adaptive ;
392
+ trace [trace_length ].opcode = SET_IP ;
393
+ trace [trace_length ].oparg = ip_offset ;
394
+ trace [trace_length + 1 ].opcode = EXIT_TRACE ;
395
+ trace [trace_length + 1 ].oparg = 0 ;
396
+ return trace_length + 2 ;
354
397
}
355
398
356
399
static int
@@ -361,7 +404,7 @@ uop_optimize(
361
404
_PyExecutorObject * * exec_ptr )
362
405
{
363
406
uop_instruction trace [MAX_TRACE_LENGTH ];
364
- int trace_length = translate_bytecode_to_trace (instr , trace , MAX_TRACE_LENGTH );
407
+ int trace_length = translate_bytecode_to_trace (code , instr , trace , MAX_TRACE_LENGTH );
365
408
if (trace_length <= 0 ) {
366
409
// Error or nothing translated
367
410
return trace_length ;
@@ -374,19 +417,25 @@ uop_optimize(
374
417
Py_INCREF (self );
375
418
executor -> optimizer = (UOpOptimizerObject * )self ;
376
419
memcpy (executor -> trace , trace , trace_length * sizeof (uop_instruction ));
377
- executor -> next_instr = instr + 1 ; // Skip the LOAD_FAST!
378
420
* exec_ptr = (_PyExecutorObject * )executor ;
379
421
return 1 ;
380
422
}
381
423
382
424
static PyObject *
383
- uop_get_state (PyObject * self , PyObject * args )
425
+ uop_get_traces (PyObject * self , PyObject * args )
426
+ {
427
+ return PyLong_FromLongLong (((UOpOptimizerObject * )self )-> traces_executed );
428
+ }
429
+
430
+ static PyObject *
431
+ uop_get_instrs (PyObject * self , PyObject * args )
384
432
{
385
433
return PyLong_FromLongLong (((UOpOptimizerObject * )self )-> instrs_executed );
386
434
}
387
435
388
436
static PyMethodDef uop_methods [] = {
389
- { "state" , uop_get_state , METH_NOARGS , NULL },
437
+ { "get_traces" , uop_get_traces , METH_NOARGS , NULL },
438
+ { "get_instrs" , uop_get_instrs , METH_NOARGS , NULL },
390
439
{ NULL , NULL },
391
440
};
392
441
@@ -409,6 +458,7 @@ PyUnstable_Optimizer_NewUOpOptimizer(void)
409
458
opt -> base .optimize = uop_optimize ;
410
459
opt -> base .resume_threshold = UINT16_MAX ;
411
460
opt -> base .backedge_threshold = 0 ;
461
+ opt -> traces_executed = 0 ;
412
462
opt -> instrs_executed = 0 ;
413
463
return (PyObject * )opt ;
414
464
}
0 commit comments