Skip to content

[mypyc] Foundational support for tuple literals (+ None and bool), try 2 #10148

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion mypyc/codegen/emitmodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,9 @@ def generate_literal_tables(self) -> None:
# Descriptions of complex literals
init_complex = c_array_initializer(literals.encoded_complex_values())
self.declare_global('const double []', 'CPyLit_Complex', initializer=init_complex)
# Descriptions of tuple literals
init_tuple = c_array_initializer(literals.encoded_tuple_values())
self.declare_global('const int []', 'CPyLit_Tuple', initializer=init_tuple)

def generate_export_table(self, decl_emitter: Emitter, code_emitter: Emitter) -> None:
"""Generate the declaration and definition of the group's export struct.
Expand Down Expand Up @@ -816,7 +819,7 @@ def generate_globals_init(self, emitter: Emitter) -> None:
for symbol, fixup in self.simple_inits:
emitter.emit_line('{} = {};'.format(symbol, fixup))

values = 'CPyLit_Str, CPyLit_Bytes, CPyLit_Int, CPyLit_Float, CPyLit_Complex'
values = 'CPyLit_Str, CPyLit_Bytes, CPyLit_Int, CPyLit_Float, CPyLit_Complex, CPyLit_Tuple'
emitter.emit_lines('if (CPyStatics_Initialize(CPyStatics, {}) < 0) {{'.format(values),
'return -1;',
'}')
Expand Down
78 changes: 70 additions & 8 deletions mypyc/codegen/literals.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
from typing import Dict, List, Union
from typing import Dict, List, Union, Tuple, Any, cast

from typing_extensions import Final


# Supported Python literal types. All tuple items must have supported
# literal types as well, but we can't represent the type precisely.
LiteralValue = Union[str, bytes, int, bool, float, complex, Tuple[object, ...], None]


# Some literals are singletons and handled specially (None, False and True)
NUM_SINGLETONS = 3 # type: Final


class Literals:
Expand All @@ -11,9 +22,13 @@ def __init__(self) -> None:
self.int_literals = {} # type: Dict[int, int]
self.float_literals = {} # type: Dict[float, int]
self.complex_literals = {} # type: Dict[complex, int]
self.tuple_literals = {} # type: Dict[Tuple[object, ...], int]

def record_literal(self, value: Union[str, bytes, int, float, complex]) -> None:
def record_literal(self, value: LiteralValue) -> None:
"""Ensure that the literal value is available in generated code."""
if value is None or value is True or value is False:
# These are special cased and always present
return
if isinstance(value, str):
str_literals = self.str_literals
if value not in str_literals:
Expand All @@ -34,15 +49,29 @@ def record_literal(self, value: Union[str, bytes, int, float, complex]) -> None:
complex_literals = self.complex_literals
if value not in complex_literals:
complex_literals[value] = len(complex_literals)
elif isinstance(value, tuple):
tuple_literals = self.tuple_literals
if value not in tuple_literals:
for item in value:
self.record_literal(cast(Any, item))
tuple_literals[value] = len(tuple_literals)
else:
assert False, 'invalid literal: %r' % value

def literal_index(self, value: Union[str, bytes, int, float, complex]) -> int:
def literal_index(self, value: LiteralValue) -> int:
"""Return the index to the literals array for given value."""
# The array contains first all str values, followed by bytes values, etc.
# The array contains first None and booleans, followed by all str values,
# followed by bytes values, etc.
if value is None:
return 0
elif value is False:
return 1
elif value is True:
return 2
n = NUM_SINGLETONS
if isinstance(value, str):
return self.str_literals[value]
n = len(self.str_literals)
return n + self.str_literals[value]
n += len(self.str_literals)
if isinstance(value, bytes):
return n + self.bytes_literals[value]
n += len(self.bytes_literals)
Expand All @@ -54,11 +83,16 @@ def literal_index(self, value: Union[str, bytes, int, float, complex]) -> int:
n += len(self.float_literals)
if isinstance(value, complex):
return n + self.complex_literals[value]
n += len(self.complex_literals)
if isinstance(value, tuple):
return n + self.tuple_literals[value]
assert False, 'invalid literal: %r' % value

def num_literals(self) -> int:
return (len(self.str_literals) + len(self.bytes_literals) + len(self.int_literals) +
len(self.float_literals) + len(self.complex_literals))
# The first three are for None, True and False
return (NUM_SINGLETONS + len(self.str_literals) + len(self.bytes_literals) +
len(self.int_literals) + len(self.float_literals) + len(self.complex_literals) +
len(self.tuple_literals))

# The following methods return the C encodings of literal values
# of different types
Expand All @@ -78,6 +112,34 @@ def encoded_float_values(self) -> List[str]:
def encoded_complex_values(self) -> List[str]:
return _encode_complex_values(self.complex_literals)

def encoded_tuple_values(self) -> List[str]:
"""Encode tuple values into a C array.

The format of the result is like this:

<number of tuples>
<length of the first tuple>
<literal index of first item>
...
<literal index of last item>
<length of the second tuple>
...
"""
values = self.tuple_literals
value_by_index = {}
for value, index in values.items():
value_by_index[index] = value
result = []
num = len(values)
result.append(str(num))
for i in range(num):
value = value_by_index[i]
result.append(str(len(value)))
for item in value:
index = self.literal_index(cast(Any, item))
result.append(str(index))
return result


def _encode_str_values(values: Dict[str, int]) -> List[bytes]:
value_by_index = {}
Expand Down
14 changes: 9 additions & 5 deletions mypyc/ir/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,20 +500,24 @@ class LoadLiteral(RegisterOp):
This is used to load a static PyObject * value corresponding to
a literal of one of the supported types.

NOTE: For int literals, both int_rprimitive (CPyTagged) and
object_primitive (PyObject *) are supported as types. However,
when using int_rprimitive, the value must *not* be small enough
to fit in an unboxed integer.
Tuple literals must contain only valid literal values as items.

NOTE: You can use this to load boxed (Python) int objects. Use
Integer to load unboxed, tagged integers or fixed-width,
low-level integers.

For int literals, both int_rprimitive (CPyTagged) and
object_primitive (PyObject *) are supported as rtype. However,
when using int_rprimitive, the value must *not* be small enough
to fit in an unboxed integer.
"""

error_kind = ERR_NEVER
is_borrowed = True

def __init__(self, value: Union[str, bytes, int, float, complex], rtype: RType) -> None:
def __init__(self,
value: Union[None, str, bytes, bool, int, float, complex, Tuple[object, ...]],
rtype: RType) -> None:
self.value = value
self.type = rtype

Expand Down
4 changes: 1 addition & 3 deletions mypyc/irbuild/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@
from mypy.types import TupleType, get_proper_type, Instance

from mypyc.common import MAX_SHORT_INT
from mypyc.ir.ops import (
Value, Register, TupleGet, TupleSet, BasicBlock, Assign, LoadAddress
)
from mypyc.ir.ops import Value, Register, TupleGet, TupleSet, BasicBlock, Assign, LoadAddress
from mypyc.ir.rtypes import (
RTuple, object_rprimitive, is_none_rprimitive, int_rprimitive, is_int_rprimitive
)
Expand Down
3 changes: 2 additions & 1 deletion mypyc/lib-rt/CPy.h
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,8 @@ int CPyStatics_Initialize(PyObject **statics,
const char * const *bytestrings,
const char * const *ints,
const double *floats,
const double *complex_numbers);
const double *complex_numbers,
const int *tuples);


#ifdef __cplusplus
Expand Down
38 changes: 32 additions & 6 deletions mypyc/lib-rt/misc_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,16 @@ int CPyStatics_Initialize(PyObject **statics,
const char * const *bytestrings,
const char * const *ints,
const double *floats,
const double *complex_numbers) {
const double *complex_numbers,
const int *tuples) {
PyObject **result = statics;
// Start with some hard-coded values
*result++ = Py_None;
Py_INCREF(Py_None);
*result++ = Py_False;
Py_INCREF(Py_False);
*result++ = Py_True;
Py_INCREF(Py_True);
if (strings) {
for (; **strings != '\0'; strings++) {
size_t num;
Expand All @@ -542,7 +551,7 @@ int CPyStatics_Initialize(PyObject **statics,
return -1;
}
PyUnicode_InternInPlace(&obj);
*statics++ = obj;
*result++ = obj;
data += len;
}
}
Expand All @@ -559,7 +568,7 @@ int CPyStatics_Initialize(PyObject **statics,
if (obj == NULL) {
return -1;
}
*statics++ = obj;
*result++ = obj;
data += len;
}
}
Expand All @@ -577,7 +586,7 @@ int CPyStatics_Initialize(PyObject **statics,
}
data = end;
data++;
*statics++ = obj;
*result++ = obj;
}
}
}
Expand All @@ -588,7 +597,7 @@ int CPyStatics_Initialize(PyObject **statics,
if (obj == NULL) {
return -1;
}
*statics++ = obj;
*result++ = obj;
}
}
if (complex_numbers) {
Expand All @@ -600,7 +609,24 @@ int CPyStatics_Initialize(PyObject **statics,
if (obj == NULL) {
return -1;
}
*statics++ = obj;
*result++ = obj;
}
}
if (tuples) {
int num = *tuples++;
while (num-- > 0) {
int num_items = *tuples++;
PyObject *obj = PyTuple_New(num_items);
if (obj == NULL) {
return -1;
}
int i;
for (i = 0; i < num_items; i++) {
PyObject *item = statics[*tuples++];
Py_INCREF(item);
PyTuple_SET_ITEM(obj, i, item);
}
*result++ = obj;
}
}
return 0;
Expand Down
31 changes: 30 additions & 1 deletion mypyc/test/test_literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import unittest

from mypyc.codegen.literals import (
format_str_literal, _encode_str_values, _encode_bytes_values, _encode_int_values
Literals, format_str_literal, _encode_str_values, _encode_bytes_values, _encode_int_values
)


Expand Down Expand Up @@ -56,3 +56,32 @@ def test_encode_int_values(self) -> None:
b'\x016' + b'0' * 100,
b''
]

def test_simple_literal_index(self) -> None:
lit = Literals()
lit.record_literal(1)
lit.record_literal('y')
lit.record_literal(True)
lit.record_literal(None)
lit.record_literal(False)
assert lit.literal_index(None) == 0
assert lit.literal_index(False) == 1
assert lit.literal_index(True) == 2
assert lit.literal_index('y') == 3
assert lit.literal_index(1) == 4

def test_tuple_literal(self) -> None:
lit = Literals()
lit.record_literal((1, 'y', None, (b'a', 'b')))
lit.record_literal((b'a', 'b'))
lit.record_literal(())
assert lit.literal_index((b'a', 'b')) == 7
assert lit.literal_index((1, 'y', None, (b'a', 'b'))) == 8
assert lit.literal_index(()) == 9
print(lit.encoded_tuple_values())
assert lit.encoded_tuple_values() == [
'3', # Number of tuples
'2', '5', '4', # First tuple (length=2)
'4', '6', '3', '0', '7', # Second tuple (length=4)
'0', # Third tuple (length=0)
]