23
23
from mypyc .irbuild .prepare import load_type_map
24
24
from mypyc .irbuild .mapper import Mapper
25
25
from mypyc .common import (
26
- PREFIX , TOP_LEVEL_NAME , INT_PREFIX , MODULE_PREFIX , RUNTIME_C_FILES , USE_FASTCALL ,
26
+ PREFIX , TOP_LEVEL_NAME , MODULE_PREFIX , RUNTIME_C_FILES , USE_FASTCALL ,
27
27
USE_VECTORCALL , shared_lib_name ,
28
28
)
29
- from mypyc .codegen .cstring import encode_as_c_string , encode_bytes_as_c_string
29
+ from mypyc .codegen .cstring import c_string_initializer
30
+ from mypyc .codegen .literals import Literals
30
31
from mypyc .codegen .emit import EmitterContext , Emitter , HeaderDeclaration
31
32
from mypyc .codegen .emitfunc import generate_native_function , native_function_header
32
33
from mypyc .codegen .emitclass import generate_class_type_decl , generate_class
33
34
from mypyc .codegen .emitwrapper import (
34
35
generate_wrapper_function , wrapper_function_header ,
35
36
generate_legacy_wrapper_function , legacy_wrapper_function_header ,
36
37
)
37
- from mypyc .ir .ops import LiteralsMap , DeserMaps
38
+ from mypyc .ir .ops import DeserMaps , LoadLiteral
38
39
from mypyc .ir .rtypes import RType , RTuple
39
40
from mypyc .ir .func_ir import FuncIR
40
41
from mypyc .ir .class_ir import ClassIR
@@ -286,9 +287,8 @@ def compile_ir_to_c(
286
287
if not group_modules :
287
288
ctext [group_name ] = []
288
289
continue
289
- literals = mapper .literals [group_name ]
290
290
generator = GroupGenerator (
291
- literals , group_modules , source_paths ,
291
+ group_modules , source_paths ,
292
292
group_name , mapper .group_map , names ,
293
293
compiler_options
294
294
)
@@ -447,7 +447,6 @@ def group_dir(group_name: str) -> str:
447
447
448
448
class GroupGenerator :
449
449
def __init__ (self ,
450
- literals : LiteralsMap ,
451
450
modules : List [Tuple [str , ModuleIR ]],
452
451
source_paths : Dict [str , str ],
453
452
group_name : Optional [str ],
@@ -461,7 +460,6 @@ def __init__(self,
461
460
one .c file per module if in multi_file mode.)
462
461
463
462
Arguments:
464
- literals: The literals declared in this group
465
463
modules: (name, ir) pairs for each module in the group
466
464
source_paths: Map from module names to source file paths
467
465
group_name: The name of the group (or None if this is single-module compilation)
@@ -470,7 +468,6 @@ def __init__(self,
470
468
multi_file: Whether to put each module in its own source file regardless
471
469
of group structure.
472
470
"""
473
- self .literals = literals
474
471
self .modules = modules
475
472
self .source_paths = source_paths
476
473
self .context = EmitterContext (names , group_name , group_map )
@@ -495,6 +492,11 @@ def generate_c_for_modules(self) -> List[Tuple[str, str]]:
495
492
file_contents = []
496
493
multi_file = self .use_shared_lib and self .multi_file
497
494
495
+ # Collect all literal refs in IR.
496
+ for _ , module in self .modules :
497
+ for fn in module .functions :
498
+ collect_literals (fn , self .context .literals )
499
+
498
500
base_emitter = Emitter (self .context )
499
501
# Optionally just include the runtime library c files to
500
502
# reduce the number of compiler invocations needed
@@ -505,12 +507,7 @@ def generate_c_for_modules(self) -> List[Tuple[str, str]]:
505
507
base_emitter .emit_line ('#include "__native_internal{}.h"' .format (self .short_group_suffix ))
506
508
emitter = base_emitter
507
509
508
- for (_ , literal ), identifier in self .literals .items ():
509
- if isinstance (literal , int ):
510
- symbol = emitter .static_name (identifier , None )
511
- self .declare_global ('CPyTagged ' , symbol )
512
- else :
513
- self .declare_static_pyobject (identifier , emitter )
510
+ self .generate_literal_tables ()
514
511
515
512
for module_name , module in self .modules :
516
513
if multi_file :
@@ -621,6 +618,32 @@ def generate_c_for_modules(self) -> List[Tuple[str, str]]:
621
618
'' .join (ext_declarations .fragments )),
622
619
]
623
620
621
+ def generate_literal_tables (self ) -> None :
622
+ """Generate tables containing descriptions of Python literals to construct.
623
+
624
+ We will store the constructed literals in a single array that contains
625
+ literals of all types. This way we can refer to an arbitrary literal by
626
+ its index.
627
+ """
628
+ literals = self .context .literals
629
+ # During module initialization we store all the constructed objects here
630
+ self .declare_global ('PyObject *[%d]' % literals .num_literals (), 'CPyStatics' )
631
+ # Descriptions of str literals
632
+ init_str = c_string_array_initializer (literals .encoded_str_values ())
633
+ self .declare_global ('const char * const []' , 'CPyLit_Str' , initializer = init_str )
634
+ # Descriptions of bytes literals
635
+ init_bytes = c_string_array_initializer (literals .encoded_bytes_values ())
636
+ self .declare_global ('const char * const []' , 'CPyLit_Bytes' , initializer = init_bytes )
637
+ # Descriptions of int literals
638
+ init_int = c_string_array_initializer (literals .encoded_int_values ())
639
+ self .declare_global ('const char * const []' , 'CPyLit_Int' , initializer = init_int )
640
+ # Descriptions of float literals
641
+ init_floats = c_array_initializer (literals .encoded_float_values ())
642
+ self .declare_global ('const double []' , 'CPyLit_Float' , initializer = init_floats )
643
+ # Descriptions of complex literals
644
+ init_complex = c_array_initializer (literals .encoded_complex_values ())
645
+ self .declare_global ('const double []' , 'CPyLit_Complex' , initializer = init_complex )
646
+
624
647
def generate_export_table (self , decl_emitter : Emitter , code_emitter : Emitter ) -> None :
625
648
"""Generate the declaration and definition of the group's export struct.
626
649
@@ -793,46 +816,10 @@ def generate_globals_init(self, emitter: Emitter) -> None:
793
816
for symbol , fixup in self .simple_inits :
794
817
emitter .emit_line ('{} = {};' .format (symbol , fixup ))
795
818
796
- for (_ , literal ), identifier in self .literals .items ():
797
- symbol = emitter .static_name (identifier , None )
798
- if isinstance (literal , int ):
799
- actual_symbol = symbol
800
- symbol = INT_PREFIX + symbol
801
- emitter .emit_line (
802
- 'PyObject * {} = PyLong_FromString(\" {}\" , NULL, 10);' .format (
803
- symbol , str (literal ))
804
- )
805
- elif isinstance (literal , float ):
806
- emitter .emit_line (
807
- '{} = PyFloat_FromDouble({});' .format (symbol , str (literal ))
808
- )
809
- elif isinstance (literal , complex ):
810
- emitter .emit_line (
811
- '{} = PyComplex_FromDoubles({}, {});' .format (
812
- symbol , str (literal .real ), str (literal .imag ))
813
- )
814
- elif isinstance (literal , str ):
815
- emitter .emit_line (
816
- '{} = PyUnicode_FromStringAndSize({}, {});' .format (
817
- symbol , * encode_as_c_string (literal ))
818
- )
819
- elif isinstance (literal , bytes ):
820
- emitter .emit_line (
821
- '{} = PyBytes_FromStringAndSize({}, {});' .format (
822
- symbol , * encode_bytes_as_c_string (literal ))
823
- )
824
- else :
825
- assert False , ('Literals must be integers, floating point numbers, or strings,' ,
826
- 'but the provided literal is of type {}' .format (type (literal )))
827
- emitter .emit_lines ('if (unlikely({} == NULL))' .format (symbol ),
828
- ' return -1;' )
829
- # Ints have an unboxed representation.
830
- if isinstance (literal , int ):
831
- emitter .emit_line (
832
- '{} = CPyTagged_FromObject({});' .format (actual_symbol , symbol )
833
- )
834
- elif isinstance (literal , str ):
835
- emitter .emit_line ('PyUnicode_InternInPlace(&{});' .format (symbol ))
819
+ values = 'CPyLit_Str, CPyLit_Bytes, CPyLit_Int, CPyLit_Float, CPyLit_Complex'
820
+ emitter .emit_lines ('if (CPyStatics_Initialize(CPyStatics, {}) < 0) {{' .format (values ),
821
+ 'return -1;' ,
822
+ '}' )
836
823
837
824
emitter .emit_lines (
838
825
'is_initialized = 1;' ,
@@ -974,13 +961,19 @@ def _toposort_visit(name: str) -> None:
974
961
def declare_global (self , type_spaced : str , name : str ,
975
962
* ,
976
963
initializer : Optional [str ] = None ) -> None :
964
+ if '[' not in type_spaced :
965
+ base = '{}{}' .format (type_spaced , name )
966
+ else :
967
+ a , b = type_spaced .split ('[' , 1 )
968
+ base = '{}{}[{}' .format (a , name , b )
969
+
977
970
if not initializer :
978
971
defn = None
979
972
else :
980
- defn = ['{}{} = {};' .format (type_spaced , name , initializer )]
973
+ defn = ['{} = {};' .format (base , initializer )]
981
974
if name not in self .context .declarations :
982
975
self .context .declarations [name ] = HeaderDeclaration (
983
- '{}{} ;' .format (type_spaced , name ),
976
+ '{};' .format (base ),
984
977
defn = defn ,
985
978
)
986
979
@@ -1080,3 +1073,55 @@ def is_fastcall_supported(fn: FuncIR) -> bool:
1080
1073
# TODO: Support fastcall for __init__.
1081
1074
return USE_FASTCALL and fn .name != '__init__'
1082
1075
return USE_FASTCALL
1076
+
1077
+
1078
+ def collect_literals (fn : FuncIR , literals : Literals ) -> None :
1079
+ """Store all Python literal object refs in fn.
1080
+
1081
+ Collecting literals must happen only after we have the final IR.
1082
+ This way we won't include literals that have been optimized away.
1083
+ """
1084
+ for block in fn .blocks :
1085
+ for op in block .ops :
1086
+ if isinstance (op , LoadLiteral ):
1087
+ literals .record_literal (op .value )
1088
+
1089
+
1090
+ def c_array_initializer (components : List [str ]) -> str :
1091
+ """Construct an initializer for a C array variable.
1092
+
1093
+ Components are C expressions valid in an initializer.
1094
+
1095
+ For example, if components are ["1", "2"], the result
1096
+ would be "{1, 2}", which can be used like this:
1097
+
1098
+ int a[] = {1, 2};
1099
+
1100
+ If the result is long, split it into multiple lines.
1101
+ """
1102
+ res = []
1103
+ current = [] # type: List[str]
1104
+ cur_len = 0
1105
+ for c in components :
1106
+ if not current or cur_len + 2 + len (c ) < 70 :
1107
+ current .append (c )
1108
+ cur_len += len (c ) + 2
1109
+ else :
1110
+ res .append (', ' .join (current ))
1111
+ current = [c ]
1112
+ cur_len = len (c )
1113
+ if not res :
1114
+ # Result fits on a single line
1115
+ return '{%s}' % ', ' .join (current )
1116
+ # Multi-line result
1117
+ res .append (', ' .join (current ))
1118
+ return '{\n ' + ',\n ' .join (res ) + '\n }'
1119
+
1120
+
1121
+ def c_string_array_initializer (components : List [bytes ]) -> str :
1122
+ result = []
1123
+ result .append ('{\n ' )
1124
+ for s in components :
1125
+ result .append (' ' + c_string_initializer (s ) + ',\n ' )
1126
+ result .append ('}' )
1127
+ return '' .join (result )
0 commit comments