Skip to content

Commit 047e427

Browse files
authored
[mypyc] Foundational support for tuple literals (+ None and bool) (#10041)
Add support for loading tuple literals using `LoadLiteral`. The literal tuple objects will be constructed during module import time, similar to other literals. Only tuples containing items of supported literal types can be represented (this includes other tuples). Add `None`, `True` and `False` to the literals array so that they can be used as tuple literal items. Currently tuple literals aren't used for anything. I added some unit tests to check parts of the implementation. The primary use case I have in mind is supporting vectorcall APIs which expect a tuple of keyword argument names. I will implemented this in a separate PR. This will also add some end-to-end testing for tuple literals. These could be used to avoid repeatedly constructing tuples with literal values in other contexts as well. Use array-based encoding for tuple literal values. We use the literal object array introduced in #10040 to allow a simple integer-based encoding of heterogeneous tuples. For example, tuple `('x', 5)` could be encoded like this as three integers: * 2 (length of tuple) * 123 (index of literal `'x'`) * 345 (index of literal `5`)
1 parent 7ec1455 commit 047e427

File tree

7 files changed

+148
-25
lines changed

7 files changed

+148
-25
lines changed

mypyc/codegen/emitmodule.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,9 @@ def generate_literal_tables(self) -> None:
643643
# Descriptions of complex literals
644644
init_complex = c_array_initializer(literals.encoded_complex_values())
645645
self.declare_global('const double []', 'CPyLit_Complex', initializer=init_complex)
646+
# Descriptions of tuple literals
647+
init_tuple = c_array_initializer(literals.encoded_tuple_values())
648+
self.declare_global('const int []', 'CPyLit_Tuple', initializer=init_tuple)
646649

647650
def generate_export_table(self, decl_emitter: Emitter, code_emitter: Emitter) -> None:
648651
"""Generate the declaration and definition of the group's export struct.
@@ -816,7 +819,7 @@ def generate_globals_init(self, emitter: Emitter) -> None:
816819
for symbol, fixup in self.simple_inits:
817820
emitter.emit_line('{} = {};'.format(symbol, fixup))
818821

819-
values = 'CPyLit_Str, CPyLit_Bytes, CPyLit_Int, CPyLit_Float, CPyLit_Complex'
822+
values = 'CPyLit_Str, CPyLit_Bytes, CPyLit_Int, CPyLit_Float, CPyLit_Complex, CPyLit_Tuple'
820823
emitter.emit_lines('if (CPyStatics_Initialize(CPyStatics, {}) < 0) {{'.format(values),
821824
'return -1;',
822825
'}')

mypyc/codegen/literals.py

Lines changed: 70 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,15 @@
1-
from typing import Dict, List, Union
1+
from typing import Dict, List, Union, Tuple, Any, cast
2+
3+
from typing_extensions import Final
4+
5+
6+
# Supported Python literal types. All tuple items must have supported
7+
# literal types as well, but we can't represent the type precisely.
8+
LiteralValue = Union[str, bytes, int, bool, float, complex, Tuple[object, ...], None]
9+
10+
11+
# Some literals are singletons and handled specially (None, False and True)
12+
NUM_SINGLETONS = 3 # type: Final
213

314

415
class Literals:
@@ -11,9 +22,13 @@ def __init__(self) -> None:
1122
self.int_literals = {} # type: Dict[int, int]
1223
self.float_literals = {} # type: Dict[float, int]
1324
self.complex_literals = {} # type: Dict[complex, int]
25+
self.tuple_literals = {} # type: Dict[Tuple[object, ...], int]
1426

15-
def record_literal(self, value: Union[str, bytes, int, float, complex]) -> None:
27+
def record_literal(self, value: LiteralValue) -> None:
1628
"""Ensure that the literal value is available in generated code."""
29+
if value is None or value is True or value is False:
30+
# These are special cased and always present
31+
return
1732
if isinstance(value, str):
1833
str_literals = self.str_literals
1934
if value not in str_literals:
@@ -34,15 +49,29 @@ def record_literal(self, value: Union[str, bytes, int, float, complex]) -> None:
3449
complex_literals = self.complex_literals
3550
if value not in complex_literals:
3651
complex_literals[value] = len(complex_literals)
52+
elif isinstance(value, tuple):
53+
tuple_literals = self.tuple_literals
54+
if value not in tuple_literals:
55+
for item in value:
56+
self.record_literal(cast(Any, item))
57+
tuple_literals[value] = len(tuple_literals)
3758
else:
3859
assert False, 'invalid literal: %r' % value
3960

40-
def literal_index(self, value: Union[str, bytes, int, float, complex]) -> int:
61+
def literal_index(self, value: LiteralValue) -> int:
4162
"""Return the index to the literals array for given value."""
42-
# The array contains first all str values, followed by bytes values, etc.
63+
# The array contains first None and booleans, followed by all str values,
64+
# followed by bytes values, etc.
65+
if value is None:
66+
return 0
67+
elif value is False:
68+
return 1
69+
elif value is True:
70+
return 2
71+
n = NUM_SINGLETONS
4372
if isinstance(value, str):
44-
return self.str_literals[value]
45-
n = len(self.str_literals)
73+
return n + self.str_literals[value]
74+
n += len(self.str_literals)
4675
if isinstance(value, bytes):
4776
return n + self.bytes_literals[value]
4877
n += len(self.bytes_literals)
@@ -54,11 +83,16 @@ def literal_index(self, value: Union[str, bytes, int, float, complex]) -> int:
5483
n += len(self.float_literals)
5584
if isinstance(value, complex):
5685
return n + self.complex_literals[value]
86+
n += len(self.complex_literals)
87+
if isinstance(value, tuple):
88+
return n + self.tuple_literals[value]
5789
assert False, 'invalid literal: %r' % value
5890

5991
def num_literals(self) -> int:
60-
return (len(self.str_literals) + len(self.bytes_literals) + len(self.int_literals) +
61-
len(self.float_literals) + len(self.complex_literals))
92+
# The first three are for None, True and False
93+
return (NUM_SINGLETONS + len(self.str_literals) + len(self.bytes_literals) +
94+
len(self.int_literals) + len(self.float_literals) + len(self.complex_literals) +
95+
len(self.tuple_literals))
6296

6397
# The following methods return the C encodings of literal values
6498
# of different types
@@ -78,6 +112,34 @@ def encoded_float_values(self) -> List[str]:
78112
def encoded_complex_values(self) -> List[str]:
79113
return encode_complex_values(self.complex_literals)
80114

115+
def encoded_tuple_values(self) -> List[str]:
116+
"""Encode tuple values into a C array.
117+
118+
The format of the result is like this:
119+
120+
<number of tuples>
121+
<length of the first tuple>
122+
<literal index of first item>
123+
...
124+
<literal index of last item>
125+
<length of the second tuple>
126+
...
127+
"""
128+
values = self.tuple_literals
129+
value_by_index = {}
130+
for value, index in values.items():
131+
value_by_index[index] = value
132+
result = []
133+
num = len(values)
134+
result.append(str(num))
135+
for i in range(num):
136+
value = value_by_index[i]
137+
result.append(str(len(value)))
138+
for item in value:
139+
index = self.literal_index(cast(Any, item))
140+
result.append(str(index))
141+
return result
142+
81143

82144
def encode_str_values(values: Dict[str, int]) -> List[bytes]:
83145
value_by_index = {}

mypyc/ir/ops.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -500,20 +500,24 @@ class LoadLiteral(RegisterOp):
500500
This is used to load a static PyObject * value corresponding to
501501
a literal of one of the supported types.
502502
503-
NOTE: For int literals, both int_rprimitive (CPyTagged) and
504-
object_primitive (PyObject *) are supported as types. However,
505-
when using int_rprimitive, the value must *not* be small enough
506-
to fit in an unboxed integer.
503+
Tuple literals must contain only valid literal values as items.
507504
508505
NOTE: You can use this to load boxed (Python) int objects. Use
509506
Integer to load unboxed, tagged integers or fixed-width,
510507
low-level integers.
508+
509+
For int literals, both int_rprimitive (CPyTagged) and
510+
object_primitive (PyObject *) are supported as rtype. However,
511+
when using int_rprimitive, the value must *not* be small enough
512+
to fit in an unboxed integer.
511513
"""
512514

513515
error_kind = ERR_NEVER
514516
is_borrowed = True
515517

516-
def __init__(self, value: Union[str, bytes, int, float, complex], rtype: RType) -> None:
518+
def __init__(self,
519+
value: Union[None, str, bytes, bool, int, float, complex, Tuple[object, ...]],
520+
rtype: RType) -> None:
517521
self.value = value
518522
self.type = rtype
519523

mypyc/irbuild/expression.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,7 @@
1717
from mypy.types import TupleType, get_proper_type, Instance
1818

1919
from mypyc.common import MAX_SHORT_INT
20-
from mypyc.ir.ops import (
21-
Value, Register, TupleGet, TupleSet, BasicBlock, Assign, LoadAddress
22-
)
20+
from mypyc.ir.ops import Value, Register, TupleGet, TupleSet, BasicBlock, Assign, LoadAddress
2321
from mypyc.ir.rtypes import (
2422
RTuple, object_rprimitive, is_none_rprimitive, int_rprimitive, is_int_rprimitive
2523
)

mypyc/lib-rt/CPy.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,8 @@ int CPyArg_ParseStackAndKeywordsSimple(PyObject *const *args, Py_ssize_t nargs,
523523

524524
int CPySequence_CheckUnpackCount(PyObject *sequence, Py_ssize_t expected);
525525
int CPyStatics_Initialize(PyObject **statics, const char *strings, const char *bytestrings,
526-
const char *ints, const double *floats, const double *complex_numbers);
526+
const char *ints, const double *floats, const double *complex_numbers,
527+
const int *tuples);
527528

528529

529530
#ifdef __cplusplus

mypyc/lib-rt/misc_ops.c

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -528,7 +528,16 @@ int CPyStatics_Initialize(PyObject **statics,
528528
const char *bytestrings,
529529
const char *ints,
530530
const double *floats,
531-
const double *complex_numbers) {
531+
const double *complex_numbers,
532+
const int *tuples) {
533+
PyObject **result = statics;
534+
// Start with some hard-coded values
535+
*result++ = Py_None;
536+
Py_INCREF(Py_None);
537+
*result++ = Py_False;
538+
Py_INCREF(Py_False);
539+
*result++ = Py_True;
540+
Py_INCREF(Py_True);
532541
if (strings) {
533542
size_t num;
534543
strings = parse_int(strings, &num);
@@ -540,7 +549,7 @@ int CPyStatics_Initialize(PyObject **statics,
540549
return -1;
541550
}
542551
PyUnicode_InternInPlace(&obj);
543-
*statics++ = obj;
552+
*result++ = obj;
544553
strings += len;
545554
}
546555
}
@@ -554,7 +563,7 @@ int CPyStatics_Initialize(PyObject **statics,
554563
if (obj == NULL) {
555564
return -1;
556565
}
557-
*statics++ = obj;
566+
*result++ = obj;
558567
bytestrings += len;
559568
}
560569
}
@@ -569,7 +578,7 @@ int CPyStatics_Initialize(PyObject **statics,
569578
}
570579
ints = end;
571580
ints++;
572-
*statics++ = obj;
581+
*result++ = obj;
573582
}
574583
}
575584
if (floats) {
@@ -579,7 +588,7 @@ int CPyStatics_Initialize(PyObject **statics,
579588
if (obj == NULL) {
580589
return -1;
581590
}
582-
*statics++ = obj;
591+
*result++ = obj;
583592
}
584593
}
585594
if (complex_numbers) {
@@ -591,7 +600,24 @@ int CPyStatics_Initialize(PyObject **statics,
591600
if (obj == NULL) {
592601
return -1;
593602
}
594-
*statics++ = obj;
603+
*result++ = obj;
604+
}
605+
}
606+
if (tuples) {
607+
int num = *tuples++;
608+
while (num-- > 0) {
609+
int num_items = *tuples++;
610+
PyObject *obj = PyTuple_New(num_items);
611+
if (obj == NULL) {
612+
return -1;
613+
}
614+
int i;
615+
for (i = 0; i < num_items; i++) {
616+
PyObject *item = statics[*tuples++];
617+
Py_INCREF(item);
618+
PyTuple_SET_ITEM(obj, i, item);
619+
}
620+
*result++ = obj;
595621
}
596622
}
597623
return 0;

mypyc/test/test_literals.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import unittest
44

5-
from mypyc.codegen.literals import format_str_literal
5+
from mypyc.codegen.literals import format_str_literal, Literals
66

77

88
class TestLiterals(unittest.TestCase):
@@ -12,3 +12,32 @@ def test_format_str_literal(self) -> None:
1212
assert format_str_literal('x' * 127) == b'\x7f' + b'x' * 127
1313
assert format_str_literal('x' * 128) == b'\x81\x00' + b'x' * 128
1414
assert format_str_literal('x' * 131) == b'\x81\x03' + b'x' * 131
15+
16+
def test_simple_literal_index(self) -> None:
17+
lit = Literals()
18+
lit.record_literal(1)
19+
lit.record_literal('y')
20+
lit.record_literal(True)
21+
lit.record_literal(None)
22+
lit.record_literal(False)
23+
assert lit.literal_index(None) == 0
24+
assert lit.literal_index(False) == 1
25+
assert lit.literal_index(True) == 2
26+
assert lit.literal_index('y') == 3
27+
assert lit.literal_index(1) == 4
28+
29+
def test_tuple_literal(self) -> None:
30+
lit = Literals()
31+
lit.record_literal((1, 'y', None, (b'a', 'b')))
32+
lit.record_literal((b'a', 'b'))
33+
lit.record_literal(())
34+
assert lit.literal_index((b'a', 'b')) == 7
35+
assert lit.literal_index((1, 'y', None, (b'a', 'b'))) == 8
36+
assert lit.literal_index(()) == 9
37+
print(lit.encoded_tuple_values())
38+
assert lit.encoded_tuple_values() == [
39+
'3', # Number of tuples
40+
'2', '5', '4', # First tuple (length=2)
41+
'4', '6', '3', '0', '7', # Second tuple (length=4)
42+
'0', # Third tuple (length=0)
43+
]

0 commit comments

Comments
 (0)