Skip to content

Commit c816503

Browse files
Yhg1sgpshead
authored andcommitted
bpo-38115: Deal with invalid bytecode offsets in lnotab (GH-16079)
Document that lnotab can contain invalid bytecode offsets (because of terrible reasons that are difficult to fix). Make dis.findlinestarts() ignore invalid offsets in lnotab. All other uses of lnotab in CPython (various reimplementations of addr2line or line2addr in Python, C and gdb) already ignore this, because they take an address to look for, instead. Add tests for the result of dis.findlinestarts() on wacky constructs in test_peepholer.py, because it's the easiest place to add them.
1 parent 7774d78 commit c816503

File tree

4 files changed

+77
-2
lines changed

4 files changed

+77
-2
lines changed

Lib/dis.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,7 @@ def findlinestarts(code):
454454
"""
455455
byte_increments = code.co_lnotab[0::2]
456456
line_increments = code.co_lnotab[1::2]
457+
bytecode_len = len(code.co_code)
457458

458459
lastlineno = None
459460
lineno = code.co_firstlineno
@@ -464,6 +465,10 @@ def findlinestarts(code):
464465
yield (addr, lineno)
465466
lastlineno = lineno
466467
addr += byte_incr
468+
if addr >= bytecode_len:
469+
# The rest of the lnotab byte offsets are past the end of
470+
# the bytecode, so the lines were optimized away.
471+
return
467472
if line_incr >= 0x80:
468473
# line_increments is an array of 8-bit signed integers
469474
line_incr -= 0x100

Lib/test/test_peepholer.py

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,20 @@ def check_jump_targets(self, code):
4040
self.fail(f'{instr.opname} at {instr.offset} '
4141
f'jumps to {tgt.opname} at {tgt.offset}')
4242

43+
def check_lnotab(self, code):
44+
"Check that the lnotab byte offsets are sensible."
45+
code = dis._get_code_object(code)
46+
lnotab = list(dis.findlinestarts(code))
47+
# Don't bother checking if the line info is sensible, because
48+
# most of the line info we can get at comes from lnotab.
49+
min_bytecode = min(t[0] for t in lnotab)
50+
max_bytecode = max(t[0] for t in lnotab)
51+
self.assertGreaterEqual(min_bytecode, 0)
52+
self.assertLess(max_bytecode, len(code.co_code))
53+
# This could conceivably test more (and probably should, as there
54+
# aren't very many tests of lnotab), if peepholer wasn't scheduled
55+
# to be replaced anyway.
56+
4357
def test_unot(self):
4458
# UNARY_NOT POP_JUMP_IF_FALSE --> POP_JUMP_IF_TRUE'
4559
def unot(x):
@@ -48,6 +62,7 @@ def unot(x):
4862
self.assertNotInBytecode(unot, 'UNARY_NOT')
4963
self.assertNotInBytecode(unot, 'POP_JUMP_IF_FALSE')
5064
self.assertInBytecode(unot, 'POP_JUMP_IF_TRUE')
65+
self.check_lnotab(unot)
5166

5267
def test_elim_inversion_of_is_or_in(self):
5368
for line, cmp_op in (
@@ -58,6 +73,7 @@ def test_elim_inversion_of_is_or_in(self):
5873
):
5974
code = compile(line, '', 'single')
6075
self.assertInBytecode(code, 'COMPARE_OP', cmp_op)
76+
self.check_lnotab(code)
6177

6278
def test_global_as_constant(self):
6379
# LOAD_GLOBAL None/True/False --> LOAD_CONST None/True/False
@@ -75,13 +91,15 @@ def h():
7591
for func, elem in ((f, None), (g, True), (h, False)):
7692
self.assertNotInBytecode(func, 'LOAD_GLOBAL')
7793
self.assertInBytecode(func, 'LOAD_CONST', elem)
94+
self.check_lnotab(func)
7895

7996
def f():
8097
'Adding a docstring made this test fail in Py2.5.0'
8198
return None
8299

83100
self.assertNotInBytecode(f, 'LOAD_GLOBAL')
84101
self.assertInBytecode(f, 'LOAD_CONST', None)
102+
self.check_lnotab(f)
85103

86104
def test_while_one(self):
87105
# Skip over: LOAD_CONST trueconst POP_JUMP_IF_FALSE xx
@@ -93,6 +111,7 @@ def f():
93111
self.assertNotInBytecode(f, elem)
94112
for elem in ('JUMP_ABSOLUTE',):
95113
self.assertInBytecode(f, elem)
114+
self.check_lnotab(f)
96115

97116
def test_pack_unpack(self):
98117
for line, elem in (
@@ -104,6 +123,7 @@ def test_pack_unpack(self):
104123
self.assertInBytecode(code, elem)
105124
self.assertNotInBytecode(code, 'BUILD_TUPLE')
106125
self.assertNotInBytecode(code, 'UNPACK_TUPLE')
126+
self.check_lnotab(code)
107127

108128
def test_folding_of_tuples_of_constants(self):
109129
for line, elem in (
@@ -116,6 +136,7 @@ def test_folding_of_tuples_of_constants(self):
116136
code = compile(line,'','single')
117137
self.assertInBytecode(code, 'LOAD_CONST', elem)
118138
self.assertNotInBytecode(code, 'BUILD_TUPLE')
139+
self.check_lnotab(code)
119140

120141
# Long tuples should be folded too.
121142
code = compile(repr(tuple(range(10000))),'','single')
@@ -124,6 +145,7 @@ def test_folding_of_tuples_of_constants(self):
124145
load_consts = [instr for instr in dis.get_instructions(code)
125146
if instr.opname == 'LOAD_CONST']
126147
self.assertEqual(len(load_consts), 2)
148+
self.check_lnotab(code)
127149

128150
# Bug 1053819: Tuple of constants misidentified when presented with:
129151
# . . . opcode_with_arg 100 unary_opcode BUILD_TUPLE 1 . . .
@@ -141,6 +163,7 @@ def crater():
141163
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
142164
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
143165
],)
166+
self.check_lnotab(crater)
144167

145168
def test_folding_of_lists_of_constants(self):
146169
for line, elem in (
@@ -153,6 +176,7 @@ def test_folding_of_lists_of_constants(self):
153176
code = compile(line, '', 'single')
154177
self.assertInBytecode(code, 'LOAD_CONST', elem)
155178
self.assertNotInBytecode(code, 'BUILD_LIST')
179+
self.check_lnotab(code)
156180

157181
def test_folding_of_sets_of_constants(self):
158182
for line, elem in (
@@ -166,6 +190,7 @@ def test_folding_of_sets_of_constants(self):
166190
code = compile(line, '', 'single')
167191
self.assertNotInBytecode(code, 'BUILD_SET')
168192
self.assertInBytecode(code, 'LOAD_CONST', elem)
193+
self.check_lnotab(code)
169194

170195
# Ensure that the resulting code actually works:
171196
def f(a):
@@ -176,9 +201,11 @@ def g(a):
176201

177202
self.assertTrue(f(3))
178203
self.assertTrue(not f(4))
204+
self.check_lnotab(f)
179205

180206
self.assertTrue(not g(3))
181207
self.assertTrue(g(4))
208+
self.check_lnotab(g)
182209

183210

184211
def test_folding_of_binops_on_constants(self):
@@ -203,41 +230,50 @@ def test_folding_of_binops_on_constants(self):
203230
self.assertInBytecode(code, 'LOAD_CONST', elem)
204231
for instr in dis.get_instructions(code):
205232
self.assertFalse(instr.opname.startswith('BINARY_'))
233+
self.check_lnotab(code)
206234

207235
# Verify that unfoldables are skipped
208236
code = compile('a=2+"b"', '', 'single')
209237
self.assertInBytecode(code, 'LOAD_CONST', 2)
210238
self.assertInBytecode(code, 'LOAD_CONST', 'b')
239+
self.check_lnotab(code)
211240

212241
# Verify that large sequences do not result from folding
213242
code = compile('a="x"*10000', '', 'single')
214243
self.assertInBytecode(code, 'LOAD_CONST', 10000)
215244
self.assertNotIn("x"*10000, code.co_consts)
245+
self.check_lnotab(code)
216246
code = compile('a=1<<1000', '', 'single')
217247
self.assertInBytecode(code, 'LOAD_CONST', 1000)
218248
self.assertNotIn(1<<1000, code.co_consts)
249+
self.check_lnotab(code)
219250
code = compile('a=2**1000', '', 'single')
220251
self.assertInBytecode(code, 'LOAD_CONST', 1000)
221252
self.assertNotIn(2**1000, code.co_consts)
253+
self.check_lnotab(code)
222254

223255
def test_binary_subscr_on_unicode(self):
224256
# valid code get optimized
225257
code = compile('"foo"[0]', '', 'single')
226258
self.assertInBytecode(code, 'LOAD_CONST', 'f')
227259
self.assertNotInBytecode(code, 'BINARY_SUBSCR')
260+
self.check_lnotab(code)
228261
code = compile('"\u0061\uffff"[1]', '', 'single')
229262
self.assertInBytecode(code, 'LOAD_CONST', '\uffff')
230263
self.assertNotInBytecode(code,'BINARY_SUBSCR')
264+
self.check_lnotab(code)
231265

232266
# With PEP 393, non-BMP char get optimized
233267
code = compile('"\U00012345"[0]', '', 'single')
234268
self.assertInBytecode(code, 'LOAD_CONST', '\U00012345')
235269
self.assertNotInBytecode(code, 'BINARY_SUBSCR')
270+
self.check_lnotab(code)
236271

237272
# invalid code doesn't get optimized
238273
# out of range
239274
code = compile('"fuu"[10]', '', 'single')
240275
self.assertInBytecode(code, 'BINARY_SUBSCR')
276+
self.check_lnotab(code)
241277

242278
def test_folding_of_unaryops_on_constants(self):
243279
for line, elem in (
@@ -252,13 +288,15 @@ def test_folding_of_unaryops_on_constants(self):
252288
self.assertInBytecode(code, 'LOAD_CONST', elem)
253289
for instr in dis.get_instructions(code):
254290
self.assertFalse(instr.opname.startswith('UNARY_'))
291+
self.check_lnotab(code)
255292

256293
# Check that -0.0 works after marshaling
257294
def negzero():
258295
return -(1.0-1.0)
259296

260-
for instr in dis.get_instructions(code):
297+
for instr in dis.get_instructions(negzero):
261298
self.assertFalse(instr.opname.startswith('UNARY_'))
299+
self.check_lnotab(negzero)
262300

263301
# Verify that unfoldables are skipped
264302
for line, elem, opname in (
@@ -268,6 +306,7 @@ def negzero():
268306
code = compile(line, '', 'single')
269307
self.assertInBytecode(code, 'LOAD_CONST', elem)
270308
self.assertInBytecode(code, opname)
309+
self.check_lnotab(code)
271310

272311
def test_elim_extra_return(self):
273312
# RETURN LOAD_CONST None RETURN --> RETURN
@@ -277,6 +316,7 @@ def f(x):
277316
returns = [instr for instr in dis.get_instructions(f)
278317
if instr.opname == 'RETURN_VALUE']
279318
self.assertEqual(len(returns), 1)
319+
self.check_lnotab(f)
280320

281321
def test_elim_jump_to_return(self):
282322
# JUMP_FORWARD to RETURN --> RETURN
@@ -290,6 +330,7 @@ def f(cond, true_value, false_value):
290330
returns = [instr for instr in dis.get_instructions(f)
291331
if instr.opname == 'RETURN_VALUE']
292332
self.assertEqual(len(returns), 2)
333+
self.check_lnotab(f)
293334

294335
def test_elim_jump_to_uncond_jump(self):
295336
# POP_JUMP_IF_FALSE to JUMP_FORWARD --> POP_JUMP_IF_FALSE to non-jump
@@ -302,6 +343,7 @@ def f():
302343
else:
303344
baz()
304345
self.check_jump_targets(f)
346+
self.check_lnotab(f)
305347

306348
def test_elim_jump_to_uncond_jump2(self):
307349
# POP_JUMP_IF_FALSE to JUMP_ABSOLUTE --> POP_JUMP_IF_FALSE to non-jump
@@ -312,6 +354,7 @@ def f():
312354
or d):
313355
a = foo()
314356
self.check_jump_targets(f)
357+
self.check_lnotab(f)
315358

316359
def test_elim_jump_to_uncond_jump3(self):
317360
# Intentionally use two-line expressions to test issue37213.
@@ -320,18 +363,21 @@ def f(a, b, c):
320363
return ((a and b)
321364
and c)
322365
self.check_jump_targets(f)
366+
self.check_lnotab(f)
323367
self.assertEqual(count_instr_recursively(f, 'JUMP_IF_FALSE_OR_POP'), 2)
324368
# JUMP_IF_TRUE_OR_POP to JUMP_IF_TRUE_OR_POP --> JUMP_IF_TRUE_OR_POP to non-jump
325369
def f(a, b, c):
326370
return ((a or b)
327371
or c)
328372
self.check_jump_targets(f)
373+
self.check_lnotab(f)
329374
self.assertEqual(count_instr_recursively(f, 'JUMP_IF_TRUE_OR_POP'), 2)
330375
# JUMP_IF_FALSE_OR_POP to JUMP_IF_TRUE_OR_POP --> POP_JUMP_IF_FALSE to non-jump
331376
def f(a, b, c):
332377
return ((a and b)
333378
or c)
334379
self.check_jump_targets(f)
380+
self.check_lnotab(f)
335381
self.assertNotInBytecode(f, 'JUMP_IF_FALSE_OR_POP')
336382
self.assertInBytecode(f, 'JUMP_IF_TRUE_OR_POP')
337383
self.assertInBytecode(f, 'POP_JUMP_IF_FALSE')
@@ -340,6 +386,7 @@ def f(a, b, c):
340386
return ((a or b)
341387
and c)
342388
self.check_jump_targets(f)
389+
self.check_lnotab(f)
343390
self.assertNotInBytecode(f, 'JUMP_IF_TRUE_OR_POP')
344391
self.assertInBytecode(f, 'JUMP_IF_FALSE_OR_POP')
345392
self.assertInBytecode(f, 'POP_JUMP_IF_TRUE')
@@ -360,6 +407,7 @@ def f(cond1, cond2):
360407
returns = [instr for instr in dis.get_instructions(f)
361408
if instr.opname == 'RETURN_VALUE']
362409
self.assertLessEqual(len(returns), 6)
410+
self.check_lnotab(f)
363411

364412
def test_elim_jump_after_return2(self):
365413
# Eliminate dead code: jumps immediately after returns can't be reached
@@ -374,13 +422,15 @@ def f(cond1, cond2):
374422
returns = [instr for instr in dis.get_instructions(f)
375423
if instr.opname == 'RETURN_VALUE']
376424
self.assertLessEqual(len(returns), 2)
425+
self.check_lnotab(f)
377426

378427
def test_make_function_doesnt_bail(self):
379428
def f():
380429
def g()->1+1:
381430
pass
382431
return g
383432
self.assertNotInBytecode(f, 'BINARY_ADD')
433+
self.check_lnotab(f)
384434

385435
def test_constant_folding(self):
386436
# Issue #11244: aggressive constant folding.
@@ -401,24 +451,28 @@ def test_constant_folding(self):
401451
self.assertFalse(instr.opname.startswith('UNARY_'))
402452
self.assertFalse(instr.opname.startswith('BINARY_'))
403453
self.assertFalse(instr.opname.startswith('BUILD_'))
454+
self.check_lnotab(code)
404455

405456
def test_in_literal_list(self):
406457
def containtest():
407458
return x in [a, b]
408459
self.assertEqual(count_instr_recursively(containtest, 'BUILD_LIST'), 0)
460+
self.check_lnotab(containtest)
409461

410462
def test_iterate_literal_list(self):
411463
def forloop():
412464
for x in [a, b]:
413465
pass
414466
self.assertEqual(count_instr_recursively(forloop, 'BUILD_LIST'), 0)
467+
self.check_lnotab(forloop)
415468

416469
def test_condition_with_binop_with_bools(self):
417470
def f():
418471
if True or False:
419472
return 1
420473
return 0
421474
self.assertEqual(f(), 1)
475+
self.check_lnotab(f)
422476

423477
def test_if_with_if_expression(self):
424478
# Check bpo-37289
@@ -427,6 +481,19 @@ def f(x):
427481
return True
428482
return False
429483
self.assertTrue(f(True))
484+
self.check_lnotab(f)
485+
486+
def test_trailing_nops(self):
487+
# Check the lnotab of a function that even after trivial
488+
# optimization has trailing nops, which the lnotab adjustment has to
489+
# handle properly (bpo-38115).
490+
def f(x):
491+
while 1:
492+
return 3
493+
while 1:
494+
return 5
495+
return 6
496+
self.check_lnotab(f)
430497

431498

432499
class TestBuglets(unittest.TestCase):
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix a bug in dis.findlinestarts() where it would return invalid bytecode offsets. Document that a code object's co_lnotab can contain invalid bytecode offsets.

Objects/lnotab_notes.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@ All about co_lnotab, the line number table.
33
Code objects store a field named co_lnotab. This is an array of unsigned bytes
44
disguised as a Python bytes object. It is used to map bytecode offsets to
55
source code line #s for tracebacks and to identify line number boundaries for
6-
line tracing.
6+
line tracing. Because of internals of the peephole optimizer, it's possible
7+
for lnotab to contain bytecode offsets that are no longer valid (for example
8+
if the optimizer removed the last line in a function).
79

810
The array is conceptually a compressed list of
911
(bytecode offset increment, line number increment)

0 commit comments

Comments
 (0)