Skip to content

Revert "[mypyc] Add LoadLiteral and use tables … literals(#10040)" #10113

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions mypyc/analysis/dataflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from mypyc.ir.ops import (
Value, ControlOp,
BasicBlock, OpVisitor, Assign, Integer, LoadErrorValue, RegisterOp, Goto, Branch, Return, Call,
Box, Unbox, Cast, Op, Unreachable, TupleGet, TupleSet, GetAttr, SetAttr, LoadLiteral,
Box, Unbox, Cast, Op, Unreachable, TupleGet, TupleSet, GetAttr, SetAttr,
LoadStatic, InitStatic, MethodCall, RaiseStandardError, CallC, LoadGlobal,
Truncate, IntOp, LoadMem, GetElementPtr, LoadAddress, ComparisonOp, SetMem
)
Expand Down Expand Up @@ -165,9 +165,6 @@ def visit_method_call(self, op: MethodCall) -> GenAndKill:
def visit_load_error_value(self, op: LoadErrorValue) -> GenAndKill:
return self.visit_register_op(op)

def visit_load_literal(self, op: LoadLiteral) -> GenAndKill:
return self.visit_register_op(op)

def visit_get_attr(self, op: GetAttr) -> GenAndKill:
return self.visit_register_op(op)

Expand Down
42 changes: 9 additions & 33 deletions mypyc/codegen/cstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,10 @@
octal digits.
"""

from typing import List
import string
from typing import Tuple

from typing_extensions import Final


CHAR_MAP = ['\\{:03o}'.format(i) for i in range(256)] # type: Final
CHAR_MAP = ['\\{:03o}'.format(i) for i in range(256)]

# It is safe to use string.printable as it always uses the C locale.
for c in string.printable:
Expand All @@ -41,33 +38,12 @@
CHAR_MAP[ord('?')] = r'\?'


def encode_bytes_as_c_string(b: bytes) -> str:
"""Produce contents of a C string literal for a byte string, without quotes."""
escaped = ''.join([CHAR_MAP[i] for i in b])
return escaped

def encode_as_c_string(s: str) -> Tuple[str, int]:
"""Produce a quoted C string literal and its size, for a UTF-8 string."""
return encode_bytes_as_c_string(s.encode('utf-8'))

def c_string_initializer(components: List[bytes]) -> str:
"""Create initializer for a C char[] variable from a list of fragments.

For example, if components is [b'foo', b'bar'], the result would be
'"foobar"', which could then be used like this to initialize 's':

const char s[] = "foobar";

If the result is long, split it into multiple lines.
"""
res = []
current = ''
for c in components:
enc = encode_bytes_as_c_string(c)
if not current or len(current) + len(enc) < 70:
current += enc
else:
res.append('"%s"' % current)
current = enc
if current:
res.append('"%s"' % current)
if len(res) > 1:
res.insert(0, '')
return '\n '.join(res)
def encode_bytes_as_c_string(b: bytes) -> Tuple[str, int]:
"""Produce a quoted C string literal and its size, for a byte string."""
escaped = ''.join([CHAR_MAP[i] for i in b])
return '"{}"'.format(escaped), len(b)
3 changes: 0 additions & 3 deletions mypyc/codegen/emit.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from mypyc.ir.class_ir import ClassIR, all_concrete_classes
from mypyc.namegen import NameGenerator, exported_name
from mypyc.sametype import is_same_type
from mypyc.codegen.literals import Literals


class HeaderDeclaration:
Expand Down Expand Up @@ -85,8 +84,6 @@ def __init__(self,
# The declaration contains the body of the struct.
self.declarations = OrderedDict() # type: Dict[str, HeaderDeclaration]

self.literals = Literals()


class Emitter:
"""Helper for C code generation."""
Expand Down
18 changes: 2 additions & 16 deletions mypyc/codegen/emitfunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@
LoadStatic, InitStatic, TupleGet, TupleSet, Call, IncRef, DecRef, Box, Cast, Unbox,
BasicBlock, Value, MethodCall, Unreachable, NAMESPACE_STATIC, NAMESPACE_TYPE, NAMESPACE_MODULE,
RaiseStandardError, CallC, LoadGlobal, Truncate, IntOp, LoadMem, GetElementPtr,
LoadAddress, ComparisonOp, SetMem, Register, LoadLiteral
LoadAddress, ComparisonOp, SetMem, Register
)
from mypyc.ir.rtypes import (
RType, RTuple, is_tagged, is_int32_rprimitive, is_int64_rprimitive, RStruct,
is_pointer_rprimitive, is_int_rprimitive
is_pointer_rprimitive
)
from mypyc.ir.func_ir import FuncIR, FuncDecl, FUNC_STATICMETHOD, FUNC_CLASSMETHOD, all_values
from mypyc.ir.class_ir import ClassIR
Expand Down Expand Up @@ -96,7 +96,6 @@ def __init__(self,
self.declarations = declarations
self.source_path = source_path
self.module_name = module_name
self.literals = emitter.context.literals

def temp_name(self) -> str:
return self.emitter.temp_name()
Expand Down Expand Up @@ -174,19 +173,6 @@ def visit_load_error_value(self, op: LoadErrorValue) -> None:
self.emit_line('%s = %s;' % (self.reg(op),
self.c_error_value(op.type)))

def visit_load_literal(self, op: LoadLiteral) -> None:
index = self.literals.literal_index(op.value)
s = repr(op.value)
if not any(x in s for x in ('/*', '*/', '\0')):
ann = ' /* %s */' % s
else:
ann = ''
if not is_int_rprimitive(op.type):
self.emit_line('%s = CPyStatics[%d];%s' % (self.reg(op), index, ann))
else:
self.emit_line('%s = (CPyTagged)CPyStatics[%d] | 1;%s' % (
self.reg(op), index, ann))

def get_attr_expr(self, obj: str, op: Union[GetAttr, SetAttr], decl_cl: ClassIR) -> str:
"""Generate attribute accessor for normal (non-property) access.

Expand Down
148 changes: 56 additions & 92 deletions mypyc/codegen/emitmodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,18 @@
from mypyc.irbuild.prepare import load_type_map
from mypyc.irbuild.mapper import Mapper
from mypyc.common import (
PREFIX, TOP_LEVEL_NAME, MODULE_PREFIX, RUNTIME_C_FILES, USE_FASTCALL,
PREFIX, TOP_LEVEL_NAME, INT_PREFIX, MODULE_PREFIX, RUNTIME_C_FILES, USE_FASTCALL,
USE_VECTORCALL, shared_lib_name,
)
from mypyc.codegen.cstring import c_string_initializer
from mypyc.codegen.literals import Literals
from mypyc.codegen.cstring import encode_as_c_string, encode_bytes_as_c_string
from mypyc.codegen.emit import EmitterContext, Emitter, HeaderDeclaration
from mypyc.codegen.emitfunc import generate_native_function, native_function_header
from mypyc.codegen.emitclass import generate_class_type_decl, generate_class
from mypyc.codegen.emitwrapper import (
generate_wrapper_function, wrapper_function_header,
generate_legacy_wrapper_function, legacy_wrapper_function_header,
)
from mypyc.ir.ops import DeserMaps, LoadLiteral
from mypyc.ir.ops import LiteralsMap, DeserMaps
from mypyc.ir.rtypes import RType, RTuple
from mypyc.ir.func_ir import FuncIR
from mypyc.ir.class_ir import ClassIR
Expand Down Expand Up @@ -287,8 +286,9 @@ def compile_ir_to_c(
if not group_modules:
ctext[group_name] = []
continue
literals = mapper.literals[group_name]
generator = GroupGenerator(
group_modules, source_paths,
literals, group_modules, source_paths,
group_name, mapper.group_map, names,
compiler_options
)
Expand Down Expand Up @@ -447,6 +447,7 @@ def group_dir(group_name: str) -> str:

class GroupGenerator:
def __init__(self,
literals: LiteralsMap,
modules: List[Tuple[str, ModuleIR]],
source_paths: Dict[str, str],
group_name: Optional[str],
Expand All @@ -460,6 +461,7 @@ def __init__(self,
one .c file per module if in multi_file mode.)

Arguments:
literals: The literals declared in this group
modules: (name, ir) pairs for each module in the group
source_paths: Map from module names to source file paths
group_name: The name of the group (or None if this is single-module compilation)
Expand All @@ -468,6 +470,7 @@ def __init__(self,
multi_file: Whether to put each module in its own source file regardless
of group structure.
"""
self.literals = literals
self.modules = modules
self.source_paths = source_paths
self.context = EmitterContext(names, group_name, group_map)
Expand All @@ -492,11 +495,6 @@ def generate_c_for_modules(self) -> List[Tuple[str, str]]:
file_contents = []
multi_file = self.use_shared_lib and self.multi_file

# Collect all literal refs in IR.
for _, module in self.modules:
for fn in module.functions:
collect_literals(fn, self.context.literals)

base_emitter = Emitter(self.context)
# Optionally just include the runtime library c files to
# reduce the number of compiler invocations needed
Expand All @@ -507,7 +505,12 @@ def generate_c_for_modules(self) -> List[Tuple[str, str]]:
base_emitter.emit_line('#include "__native_internal{}.h"'.format(self.short_group_suffix))
emitter = base_emitter

self.generate_literal_tables()
for (_, literal), identifier in self.literals.items():
if isinstance(literal, int):
symbol = emitter.static_name(identifier, None)
self.declare_global('CPyTagged ', symbol)
else:
self.declare_static_pyobject(identifier, emitter)

for module_name, module in self.modules:
if multi_file:
Expand Down Expand Up @@ -618,32 +621,6 @@ def generate_c_for_modules(self) -> List[Tuple[str, str]]:
''.join(ext_declarations.fragments)),
]

def generate_literal_tables(self) -> None:
"""Generate tables containing descriptions of Python literals to construct.

We will store the constructed literals in a single array that contains
literals of all types. This way we can refer to an arbitrary literal by
its index.
"""
literals = self.context.literals
# During module initialization we store all the constructed objects here
self.declare_global('PyObject *[%d]' % literals.num_literals(), 'CPyStatics')
# Descriptions of str literals
init_str = c_string_initializer(literals.encoded_str_values())
self.declare_global('const char []', 'CPyLit_Str', initializer=init_str)
# Descriptions of bytes literals
init_bytes = c_string_initializer(literals.encoded_bytes_values())
self.declare_global('const char []', 'CPyLit_Bytes', initializer=init_bytes)
# Descriptions of int literals
init_int = c_string_initializer(literals.encoded_int_values())
self.declare_global('const char []', 'CPyLit_Int', initializer=init_int)
# Descriptions of float literals
init_floats = c_array_initializer(literals.encoded_float_values())
self.declare_global('const double []', 'CPyLit_Float', initializer=init_floats)
# Descriptions of complex literals
init_complex = c_array_initializer(literals.encoded_complex_values())
self.declare_global('const double []', 'CPyLit_Complex', initializer=init_complex)

def generate_export_table(self, decl_emitter: Emitter, code_emitter: Emitter) -> None:
"""Generate the declaration and definition of the group's export struct.

Expand Down Expand Up @@ -816,10 +793,46 @@ def generate_globals_init(self, emitter: Emitter) -> None:
for symbol, fixup in self.simple_inits:
emitter.emit_line('{} = {};'.format(symbol, fixup))

values = 'CPyLit_Str, CPyLit_Bytes, CPyLit_Int, CPyLit_Float, CPyLit_Complex'
emitter.emit_lines('if (CPyStatics_Initialize(CPyStatics, {}) < 0) {{'.format(values),
'return -1;',
'}')
for (_, literal), identifier in self.literals.items():
symbol = emitter.static_name(identifier, None)
if isinstance(literal, int):
actual_symbol = symbol
symbol = INT_PREFIX + symbol
emitter.emit_line(
'PyObject * {} = PyLong_FromString(\"{}\", NULL, 10);'.format(
symbol, str(literal))
)
elif isinstance(literal, float):
emitter.emit_line(
'{} = PyFloat_FromDouble({});'.format(symbol, str(literal))
)
elif isinstance(literal, complex):
emitter.emit_line(
'{} = PyComplex_FromDoubles({}, {});'.format(
symbol, str(literal.real), str(literal.imag))
)
elif isinstance(literal, str):
emitter.emit_line(
'{} = PyUnicode_FromStringAndSize({}, {});'.format(
symbol, *encode_as_c_string(literal))
)
elif isinstance(literal, bytes):
emitter.emit_line(
'{} = PyBytes_FromStringAndSize({}, {});'.format(
symbol, *encode_bytes_as_c_string(literal))
)
else:
assert False, ('Literals must be integers, floating point numbers, or strings,',
'but the provided literal is of type {}'.format(type(literal)))
emitter.emit_lines('if (unlikely({} == NULL))'.format(symbol),
' return -1;')
# Ints have an unboxed representation.
if isinstance(literal, int):
emitter.emit_line(
'{} = CPyTagged_FromObject({});'.format(actual_symbol, symbol)
)
elif isinstance(literal, str):
emitter.emit_line('PyUnicode_InternInPlace(&{});'.format(symbol))

emitter.emit_lines(
'is_initialized = 1;',
Expand Down Expand Up @@ -961,19 +974,13 @@ def _toposort_visit(name: str) -> None:
def declare_global(self, type_spaced: str, name: str,
*,
initializer: Optional[str] = None) -> None:
if '[' not in type_spaced:
base = '{}{}'.format(type_spaced, name)
else:
a, b = type_spaced.split('[', 1)
base = '{}{}[{}'.format(a, name, b)

if not initializer:
defn = None
else:
defn = ['{} = {};'.format(base, initializer)]
defn = ['{}{} = {};'.format(type_spaced, name, initializer)]
if name not in self.context.declarations:
self.context.declarations[name] = HeaderDeclaration(
'{};'.format(base),
'{}{};'.format(type_spaced, name),
defn=defn,
)

Expand Down Expand Up @@ -1073,46 +1080,3 @@ def is_fastcall_supported(fn: FuncIR) -> bool:
# TODO: Support fastcall for __init__.
return USE_FASTCALL and fn.name != '__init__'
return USE_FASTCALL


def collect_literals(fn: FuncIR, literals: Literals) -> None:
"""Store all Python literal object refs in fn.

Collecting literals must happen only after we have the final IR.
This way we won't include literals that have been optimized away.
"""
for block in fn.blocks:
for op in block.ops:
if isinstance(op, LoadLiteral):
literals.record_literal(op.value)


def c_array_initializer(components: List[str]) -> str:
"""Construct an initializer for a C array variable.

Components are C expressions valid in an initializer.

For example, if components are ["1", "2"], the result
would be "{1, 2}", which can be used like this:

int a[] = {1, 2};

If the result is long, split it into multiple lines.
"""
res = []
current = [] # type: List[str]
cur_len = 0
for c in components:
if not current or cur_len + 2 + len(c) < 70:
current.append(c)
cur_len += len(c) + 2
else:
res.append(', '.join(current))
current = [c]
cur_len = len(c)
if not res:
# Result fits on a single line
return '{%s}' % ', '.join(current)
# Multi-line result
res.append(', '.join(current))
return '{\n ' + ',\n '.join(res) + '\n}'
Loading