Skip to content

Commit b07c564

Browse files
authored
Revert "[mypyc] Add LoadLiteral and use tables to construct and store literals (#10040)" (#10113)
This reverts commit 7ec1455. Looks like this broke Windows wheel builds somehow.
1 parent 9da0ce1 commit b07c564

33 files changed

+407
-835
lines changed

mypyc/analysis/dataflow.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from mypyc.ir.ops import (
88
Value, ControlOp,
99
BasicBlock, OpVisitor, Assign, Integer, LoadErrorValue, RegisterOp, Goto, Branch, Return, Call,
10-
Box, Unbox, Cast, Op, Unreachable, TupleGet, TupleSet, GetAttr, SetAttr, LoadLiteral,
10+
Box, Unbox, Cast, Op, Unreachable, TupleGet, TupleSet, GetAttr, SetAttr,
1111
LoadStatic, InitStatic, MethodCall, RaiseStandardError, CallC, LoadGlobal,
1212
Truncate, IntOp, LoadMem, GetElementPtr, LoadAddress, ComparisonOp, SetMem
1313
)
@@ -165,9 +165,6 @@ def visit_method_call(self, op: MethodCall) -> GenAndKill:
165165
def visit_load_error_value(self, op: LoadErrorValue) -> GenAndKill:
166166
return self.visit_register_op(op)
167167

168-
def visit_load_literal(self, op: LoadLiteral) -> GenAndKill:
169-
return self.visit_register_op(op)
170-
171168
def visit_get_attr(self, op: GetAttr) -> GenAndKill:
172169
return self.visit_register_op(op)
173170

mypyc/codegen/cstring.py

Lines changed: 9 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,10 @@
1818
octal digits.
1919
"""
2020

21-
from typing import List
2221
import string
22+
from typing import Tuple
2323

24-
from typing_extensions import Final
25-
26-
27-
CHAR_MAP = ['\\{:03o}'.format(i) for i in range(256)] # type: Final
24+
CHAR_MAP = ['\\{:03o}'.format(i) for i in range(256)]
2825

2926
# It is safe to use string.printable as it always uses the C locale.
3027
for c in string.printable:
@@ -41,33 +38,12 @@
4138
CHAR_MAP[ord('?')] = r'\?'
4239

4340

44-
def encode_bytes_as_c_string(b: bytes) -> str:
45-
"""Produce contents of a C string literal for a byte string, without quotes."""
46-
escaped = ''.join([CHAR_MAP[i] for i in b])
47-
return escaped
48-
41+
def encode_as_c_string(s: str) -> Tuple[str, int]:
42+
"""Produce a quoted C string literal and its size, for a UTF-8 string."""
43+
return encode_bytes_as_c_string(s.encode('utf-8'))
4944

50-
def c_string_initializer(components: List[bytes]) -> str:
51-
"""Create initializer for a C char[] variable from a list of fragments.
5245

53-
For example, if components is [b'foo', b'bar'], the result would be
54-
'"foobar"', which could then be used like this to initialize 's':
55-
56-
const char s[] = "foobar";
57-
58-
If the result is long, split it into multiple lines.
59-
"""
60-
res = []
61-
current = ''
62-
for c in components:
63-
enc = encode_bytes_as_c_string(c)
64-
if not current or len(current) + len(enc) < 70:
65-
current += enc
66-
else:
67-
res.append('"%s"' % current)
68-
current = enc
69-
if current:
70-
res.append('"%s"' % current)
71-
if len(res) > 1:
72-
res.insert(0, '')
73-
return '\n '.join(res)
46+
def encode_bytes_as_c_string(b: bytes) -> Tuple[str, int]:
47+
"""Produce a quoted C string literal and its size, for a byte string."""
48+
escaped = ''.join([CHAR_MAP[i] for i in b])
49+
return '"{}"'.format(escaped), len(b)

mypyc/codegen/emit.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
from mypyc.ir.class_ir import ClassIR, all_concrete_classes
2121
from mypyc.namegen import NameGenerator, exported_name
2222
from mypyc.sametype import is_same_type
23-
from mypyc.codegen.literals import Literals
2423

2524

2625
class HeaderDeclaration:
@@ -85,8 +84,6 @@ def __init__(self,
8584
# The declaration contains the body of the struct.
8685
self.declarations = OrderedDict() # type: Dict[str, HeaderDeclaration]
8786

88-
self.literals = Literals()
89-
9087

9188
class Emitter:
9289
"""Helper for C code generation."""

mypyc/codegen/emitfunc.py

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@
1212
LoadStatic, InitStatic, TupleGet, TupleSet, Call, IncRef, DecRef, Box, Cast, Unbox,
1313
BasicBlock, Value, MethodCall, Unreachable, NAMESPACE_STATIC, NAMESPACE_TYPE, NAMESPACE_MODULE,
1414
RaiseStandardError, CallC, LoadGlobal, Truncate, IntOp, LoadMem, GetElementPtr,
15-
LoadAddress, ComparisonOp, SetMem, Register, LoadLiteral
15+
LoadAddress, ComparisonOp, SetMem, Register
1616
)
1717
from mypyc.ir.rtypes import (
1818
RType, RTuple, is_tagged, is_int32_rprimitive, is_int64_rprimitive, RStruct,
19-
is_pointer_rprimitive, is_int_rprimitive
19+
is_pointer_rprimitive
2020
)
2121
from mypyc.ir.func_ir import FuncIR, FuncDecl, FUNC_STATICMETHOD, FUNC_CLASSMETHOD, all_values
2222
from mypyc.ir.class_ir import ClassIR
@@ -96,7 +96,6 @@ def __init__(self,
9696
self.declarations = declarations
9797
self.source_path = source_path
9898
self.module_name = module_name
99-
self.literals = emitter.context.literals
10099

101100
def temp_name(self) -> str:
102101
return self.emitter.temp_name()
@@ -174,19 +173,6 @@ def visit_load_error_value(self, op: LoadErrorValue) -> None:
174173
self.emit_line('%s = %s;' % (self.reg(op),
175174
self.c_error_value(op.type)))
176175

177-
def visit_load_literal(self, op: LoadLiteral) -> None:
178-
index = self.literals.literal_index(op.value)
179-
s = repr(op.value)
180-
if not any(x in s for x in ('/*', '*/', '\0')):
181-
ann = ' /* %s */' % s
182-
else:
183-
ann = ''
184-
if not is_int_rprimitive(op.type):
185-
self.emit_line('%s = CPyStatics[%d];%s' % (self.reg(op), index, ann))
186-
else:
187-
self.emit_line('%s = (CPyTagged)CPyStatics[%d] | 1;%s' % (
188-
self.reg(op), index, ann))
189-
190176
def get_attr_expr(self, obj: str, op: Union[GetAttr, SetAttr], decl_cl: ClassIR) -> str:
191177
"""Generate attribute accessor for normal (non-property) access.
192178

mypyc/codegen/emitmodule.py

Lines changed: 56 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,18 @@
2323
from mypyc.irbuild.prepare import load_type_map
2424
from mypyc.irbuild.mapper import Mapper
2525
from mypyc.common import (
26-
PREFIX, TOP_LEVEL_NAME, MODULE_PREFIX, RUNTIME_C_FILES, USE_FASTCALL,
26+
PREFIX, TOP_LEVEL_NAME, INT_PREFIX, MODULE_PREFIX, RUNTIME_C_FILES, USE_FASTCALL,
2727
USE_VECTORCALL, shared_lib_name,
2828
)
29-
from mypyc.codegen.cstring import c_string_initializer
30-
from mypyc.codegen.literals import Literals
29+
from mypyc.codegen.cstring import encode_as_c_string, encode_bytes_as_c_string
3130
from mypyc.codegen.emit import EmitterContext, Emitter, HeaderDeclaration
3231
from mypyc.codegen.emitfunc import generate_native_function, native_function_header
3332
from mypyc.codegen.emitclass import generate_class_type_decl, generate_class
3433
from mypyc.codegen.emitwrapper import (
3534
generate_wrapper_function, wrapper_function_header,
3635
generate_legacy_wrapper_function, legacy_wrapper_function_header,
3736
)
38-
from mypyc.ir.ops import DeserMaps, LoadLiteral
37+
from mypyc.ir.ops import LiteralsMap, DeserMaps
3938
from mypyc.ir.rtypes import RType, RTuple
4039
from mypyc.ir.func_ir import FuncIR
4140
from mypyc.ir.class_ir import ClassIR
@@ -287,8 +286,9 @@ def compile_ir_to_c(
287286
if not group_modules:
288287
ctext[group_name] = []
289288
continue
289+
literals = mapper.literals[group_name]
290290
generator = GroupGenerator(
291-
group_modules, source_paths,
291+
literals, group_modules, source_paths,
292292
group_name, mapper.group_map, names,
293293
compiler_options
294294
)
@@ -447,6 +447,7 @@ def group_dir(group_name: str) -> str:
447447

448448
class GroupGenerator:
449449
def __init__(self,
450+
literals: LiteralsMap,
450451
modules: List[Tuple[str, ModuleIR]],
451452
source_paths: Dict[str, str],
452453
group_name: Optional[str],
@@ -460,6 +461,7 @@ def __init__(self,
460461
one .c file per module if in multi_file mode.)
461462
462463
Arguments:
464+
literals: The literals declared in this group
463465
modules: (name, ir) pairs for each module in the group
464466
source_paths: Map from module names to source file paths
465467
group_name: The name of the group (or None if this is single-module compilation)
@@ -468,6 +470,7 @@ def __init__(self,
468470
multi_file: Whether to put each module in its own source file regardless
469471
of group structure.
470472
"""
473+
self.literals = literals
471474
self.modules = modules
472475
self.source_paths = source_paths
473476
self.context = EmitterContext(names, group_name, group_map)
@@ -492,11 +495,6 @@ def generate_c_for_modules(self) -> List[Tuple[str, str]]:
492495
file_contents = []
493496
multi_file = self.use_shared_lib and self.multi_file
494497

495-
# Collect all literal refs in IR.
496-
for _, module in self.modules:
497-
for fn in module.functions:
498-
collect_literals(fn, self.context.literals)
499-
500498
base_emitter = Emitter(self.context)
501499
# Optionally just include the runtime library c files to
502500
# reduce the number of compiler invocations needed
@@ -507,7 +505,12 @@ def generate_c_for_modules(self) -> List[Tuple[str, str]]:
507505
base_emitter.emit_line('#include "__native_internal{}.h"'.format(self.short_group_suffix))
508506
emitter = base_emitter
509507

510-
self.generate_literal_tables()
508+
for (_, literal), identifier in self.literals.items():
509+
if isinstance(literal, int):
510+
symbol = emitter.static_name(identifier, None)
511+
self.declare_global('CPyTagged ', symbol)
512+
else:
513+
self.declare_static_pyobject(identifier, emitter)
511514

512515
for module_name, module in self.modules:
513516
if multi_file:
@@ -618,32 +621,6 @@ def generate_c_for_modules(self) -> List[Tuple[str, str]]:
618621
''.join(ext_declarations.fragments)),
619622
]
620623

621-
def generate_literal_tables(self) -> None:
622-
"""Generate tables containing descriptions of Python literals to construct.
623-
624-
We will store the constructed literals in a single array that contains
625-
literals of all types. This way we can refer to an arbitrary literal by
626-
its index.
627-
"""
628-
literals = self.context.literals
629-
# During module initialization we store all the constructed objects here
630-
self.declare_global('PyObject *[%d]' % literals.num_literals(), 'CPyStatics')
631-
# Descriptions of str literals
632-
init_str = c_string_initializer(literals.encoded_str_values())
633-
self.declare_global('const char []', 'CPyLit_Str', initializer=init_str)
634-
# Descriptions of bytes literals
635-
init_bytes = c_string_initializer(literals.encoded_bytes_values())
636-
self.declare_global('const char []', 'CPyLit_Bytes', initializer=init_bytes)
637-
# Descriptions of int literals
638-
init_int = c_string_initializer(literals.encoded_int_values())
639-
self.declare_global('const char []', 'CPyLit_Int', initializer=init_int)
640-
# Descriptions of float literals
641-
init_floats = c_array_initializer(literals.encoded_float_values())
642-
self.declare_global('const double []', 'CPyLit_Float', initializer=init_floats)
643-
# Descriptions of complex literals
644-
init_complex = c_array_initializer(literals.encoded_complex_values())
645-
self.declare_global('const double []', 'CPyLit_Complex', initializer=init_complex)
646-
647624
def generate_export_table(self, decl_emitter: Emitter, code_emitter: Emitter) -> None:
648625
"""Generate the declaration and definition of the group's export struct.
649626
@@ -816,10 +793,46 @@ def generate_globals_init(self, emitter: Emitter) -> None:
816793
for symbol, fixup in self.simple_inits:
817794
emitter.emit_line('{} = {};'.format(symbol, fixup))
818795

819-
values = 'CPyLit_Str, CPyLit_Bytes, CPyLit_Int, CPyLit_Float, CPyLit_Complex'
820-
emitter.emit_lines('if (CPyStatics_Initialize(CPyStatics, {}) < 0) {{'.format(values),
821-
'return -1;',
822-
'}')
796+
for (_, literal), identifier in self.literals.items():
797+
symbol = emitter.static_name(identifier, None)
798+
if isinstance(literal, int):
799+
actual_symbol = symbol
800+
symbol = INT_PREFIX + symbol
801+
emitter.emit_line(
802+
'PyObject * {} = PyLong_FromString(\"{}\", NULL, 10);'.format(
803+
symbol, str(literal))
804+
)
805+
elif isinstance(literal, float):
806+
emitter.emit_line(
807+
'{} = PyFloat_FromDouble({});'.format(symbol, str(literal))
808+
)
809+
elif isinstance(literal, complex):
810+
emitter.emit_line(
811+
'{} = PyComplex_FromDoubles({}, {});'.format(
812+
symbol, str(literal.real), str(literal.imag))
813+
)
814+
elif isinstance(literal, str):
815+
emitter.emit_line(
816+
'{} = PyUnicode_FromStringAndSize({}, {});'.format(
817+
symbol, *encode_as_c_string(literal))
818+
)
819+
elif isinstance(literal, bytes):
820+
emitter.emit_line(
821+
'{} = PyBytes_FromStringAndSize({}, {});'.format(
822+
symbol, *encode_bytes_as_c_string(literal))
823+
)
824+
else:
825+
assert False, ('Literals must be integers, floating point numbers, or strings,',
826+
'but the provided literal is of type {}'.format(type(literal)))
827+
emitter.emit_lines('if (unlikely({} == NULL))'.format(symbol),
828+
' return -1;')
829+
# Ints have an unboxed representation.
830+
if isinstance(literal, int):
831+
emitter.emit_line(
832+
'{} = CPyTagged_FromObject({});'.format(actual_symbol, symbol)
833+
)
834+
elif isinstance(literal, str):
835+
emitter.emit_line('PyUnicode_InternInPlace(&{});'.format(symbol))
823836

824837
emitter.emit_lines(
825838
'is_initialized = 1;',
@@ -961,19 +974,13 @@ def _toposort_visit(name: str) -> None:
961974
def declare_global(self, type_spaced: str, name: str,
962975
*,
963976
initializer: Optional[str] = None) -> None:
964-
if '[' not in type_spaced:
965-
base = '{}{}'.format(type_spaced, name)
966-
else:
967-
a, b = type_spaced.split('[', 1)
968-
base = '{}{}[{}'.format(a, name, b)
969-
970977
if not initializer:
971978
defn = None
972979
else:
973-
defn = ['{} = {};'.format(base, initializer)]
980+
defn = ['{}{} = {};'.format(type_spaced, name, initializer)]
974981
if name not in self.context.declarations:
975982
self.context.declarations[name] = HeaderDeclaration(
976-
'{};'.format(base),
983+
'{}{};'.format(type_spaced, name),
977984
defn=defn,
978985
)
979986

@@ -1073,46 +1080,3 @@ def is_fastcall_supported(fn: FuncIR) -> bool:
10731080
# TODO: Support fastcall for __init__.
10741081
return USE_FASTCALL and fn.name != '__init__'
10751082
return USE_FASTCALL
1076-
1077-
1078-
def collect_literals(fn: FuncIR, literals: Literals) -> None:
1079-
"""Store all Python literal object refs in fn.
1080-
1081-
Collecting literals must happen only after we have the final IR.
1082-
This way we won't include literals that have been optimized away.
1083-
"""
1084-
for block in fn.blocks:
1085-
for op in block.ops:
1086-
if isinstance(op, LoadLiteral):
1087-
literals.record_literal(op.value)
1088-
1089-
1090-
def c_array_initializer(components: List[str]) -> str:
1091-
"""Construct an initializer for a C array variable.
1092-
1093-
Components are C expressions valid in an initializer.
1094-
1095-
For example, if components are ["1", "2"], the result
1096-
would be "{1, 2}", which can be used like this:
1097-
1098-
int a[] = {1, 2};
1099-
1100-
If the result is long, split it into multiple lines.
1101-
"""
1102-
res = []
1103-
current = [] # type: List[str]
1104-
cur_len = 0
1105-
for c in components:
1106-
if not current or cur_len + 2 + len(c) < 70:
1107-
current.append(c)
1108-
cur_len += len(c) + 2
1109-
else:
1110-
res.append(', '.join(current))
1111-
current = [c]
1112-
cur_len = len(c)
1113-
if not res:
1114-
# Result fits on a single line
1115-
return '{%s}' % ', '.join(current)
1116-
# Multi-line result
1117-
res.append(', '.join(current))
1118-
return '{\n ' + ',\n '.join(res) + '\n}'

0 commit comments

Comments
 (0)