Skip to content

Commit 832876b

Browse files
authored
Add miss stats for specialized instructions. (GH-31108)
1 parent ba650af commit 832876b

File tree

3 files changed

+43
-31
lines changed

3 files changed

+43
-31
lines changed

Python/ceval.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5403,6 +5403,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
54035403
#define MISS_WITH_CACHE(opname) \
54045404
opname ## _miss: \
54055405
{ \
5406+
STAT_INC(opcode, miss); \
54065407
STAT_INC(opname, miss); \
54075408
_PyAdaptiveEntry *cache = &GET_CACHE()->adaptive; \
54085409
cache->counter--; \

Python/specialize.c

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,33 @@
3838
<instr N-1>
3939
*/
4040

41+
/* Map from opcode to adaptive opcode.
42+
Values of zero are ignored. */
43+
static uint8_t adaptive_opcodes[256] = {
44+
[LOAD_ATTR] = LOAD_ATTR_ADAPTIVE,
45+
[LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE,
46+
[LOAD_METHOD] = LOAD_METHOD_ADAPTIVE,
47+
[BINARY_SUBSCR] = BINARY_SUBSCR_ADAPTIVE,
48+
[STORE_SUBSCR] = STORE_SUBSCR_ADAPTIVE,
49+
[CALL] = CALL_ADAPTIVE,
50+
[STORE_ATTR] = STORE_ATTR_ADAPTIVE,
51+
[BINARY_OP] = BINARY_OP_ADAPTIVE,
52+
[COMPARE_OP] = COMPARE_OP_ADAPTIVE,
53+
};
54+
55+
/* The number of cache entries required for a "family" of instructions. */
56+
static uint8_t cache_requirements[256] = {
57+
[LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
58+
[LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */
59+
[LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */
60+
[BINARY_SUBSCR] = 2, /* _PyAdaptiveEntry, _PyObjectCache */
61+
[STORE_SUBSCR] = 0,
62+
[CALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
63+
[STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
64+
[BINARY_OP] = 1, // _PyAdaptiveEntry
65+
[COMPARE_OP] = 1, /* _PyAdaptiveEntry */
66+
};
67+
4168
Py_ssize_t _Py_QuickenedCount = 0;
4269
#ifdef Py_STATS
4370
PyStats _py_stats = { 0 };
@@ -144,7 +171,14 @@ _Py_GetSpecializationStats(void) {
144171
static void
145172
print_spec_stats(FILE *out, OpcodeStats *stats)
146173
{
174+
/* Mark some opcodes as specializable for stats,
175+
* even though we don't specialize them yet. */
176+
fprintf(out, " opcode[%d].specializable : 1\n", FOR_ITER);
177+
fprintf(out, " opcode[%d].specializable : 1\n", UNPACK_SEQUENCE);
147178
for (int i = 0; i < 256; i++) {
179+
if (adaptive_opcodes[i]) {
180+
fprintf(out, " opcode[%d].specializable : 1\n", i);
181+
}
148182
PRINT_STAT(i, specialization.success);
149183
PRINT_STAT(i, specialization.failure);
150184
PRINT_STAT(i, specialization.hit);
@@ -266,33 +300,6 @@ get_cache_count(SpecializedCacheOrInstruction *quickened) {
266300
return quickened[0].entry.zero.cache_count;
267301
}
268302

269-
/* Map from opcode to adaptive opcode.
270-
Values of zero are ignored. */
271-
static uint8_t adaptive_opcodes[256] = {
272-
[LOAD_ATTR] = LOAD_ATTR_ADAPTIVE,
273-
[LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE,
274-
[LOAD_METHOD] = LOAD_METHOD_ADAPTIVE,
275-
[BINARY_SUBSCR] = BINARY_SUBSCR_ADAPTIVE,
276-
[STORE_SUBSCR] = STORE_SUBSCR_ADAPTIVE,
277-
[CALL] = CALL_ADAPTIVE,
278-
[STORE_ATTR] = STORE_ATTR_ADAPTIVE,
279-
[BINARY_OP] = BINARY_OP_ADAPTIVE,
280-
[COMPARE_OP] = COMPARE_OP_ADAPTIVE,
281-
};
282-
283-
/* The number of cache entries required for a "family" of instructions. */
284-
static uint8_t cache_requirements[256] = {
285-
[LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
286-
[LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */
287-
[LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */
288-
[BINARY_SUBSCR] = 2, /* _PyAdaptiveEntry, _PyObjectCache */
289-
[STORE_SUBSCR] = 0,
290-
[CALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
291-
[STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
292-
[BINARY_OP] = 1, // _PyAdaptiveEntry
293-
[COMPARE_OP] = 1, /* _PyAdaptiveEntry */
294-
};
295-
296303
/* Return the oparg for the cache_offset and instruction index.
297304
*
298305
* If no cache is needed then return the original oparg.

Tools/scripts/summarize_stats.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,10 @@
2222
pass
2323
opname.append(name)
2424

25-
2625
TOTAL = "specialization.deferred", "specialization.hit", "specialization.miss", "execution_count"
2726

2827
def print_specialization_stats(name, family_stats):
29-
if "specialization.failure" not in family_stats:
28+
if "specializable" not in family_stats:
3029
return
3130
total = sum(family_stats.get(kind, 0) for kind in TOTAL)
3231
if total == 0:
@@ -87,13 +86,18 @@ def main():
8786
for i, opcode_stat in enumerate(opcode_stats):
8887
if "execution_count" in opcode_stat:
8988
count = opcode_stat['execution_count']
90-
counts.append((count, opname[i]))
89+
miss = 0
90+
if "specializable" not in opcode_stat:
91+
miss = opcode_stat.get("specialization.miss")
92+
counts.append((count, opname[i], miss))
9193
total += count
9294
counts.sort(reverse=True)
9395
cummulative = 0
94-
for (count, name) in counts:
96+
for (count, name, miss) in counts:
9597
cummulative += count
9698
print(f"{name}: {count} {100*count/total:0.1f}% {100*cummulative/total:0.1f}%")
99+
if miss:
100+
print(f" Misses: {miss} {100*miss/count:0.1f}%")
97101
print("Specialization stats:")
98102
for i, opcode_stat in enumerate(opcode_stats):
99103
name = opname[i]

0 commit comments

Comments
 (0)