Skip to content

Commit 6bb2266

Browse files
[mypyc] Build lists using a primitive op (#10807)
Closes mypyc/mypyc#264 This PR adds a primitive op and a C helper function for building lists. ## Test Plan This change helps reduce the generated code size, however adds some overhead in calling. Microbenchmark tested on master branch: ``` interpreted: 0.000256s (avg of 3582 iterations; stdev 2.2%) compiled: 0.000063s (avg of 3582 iterations; stdev 2.1%) compiled is 4.073x faster ``` On this PR: ``` interpreted: 0.000275s (avg of 3331 iterations; stdev 4.4%) compiled: 0.000083s (avg of 3331 iterations; stdev 4.5%) compiled is 3.321x faster ``` ``` @benchmark def list_build() -> None: n = 0 for i in range(1000): x = ["x", "y", "1", "2", str(i)] n += len(x) assert n == 5000, n ```
1 parent a1863c9 commit 6bb2266

File tree

6 files changed

+99
-16
lines changed

6 files changed

+99
-16
lines changed

mypyc/irbuild/ll_builder.py

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,16 @@
2424
GetAttr, LoadStatic, MethodCall, CallC, Truncate, LoadLiteral, AssignMulti,
2525
RaiseStandardError, Unreachable, LoadErrorValue,
2626
NAMESPACE_TYPE, NAMESPACE_MODULE, NAMESPACE_STATIC, IntOp, GetElementPtr,
27-
LoadMem, ComparisonOp, LoadAddress, TupleGet, SetMem, KeepAlive, ERR_NEVER, ERR_FALSE
27+
LoadMem, ComparisonOp, LoadAddress, TupleGet, KeepAlive, ERR_NEVER, ERR_FALSE, SetMem
2828
)
2929
from mypyc.ir.rtypes import (
3030
RType, RUnion, RInstance, RArray, optional_value_type, int_rprimitive, float_rprimitive,
3131
bool_rprimitive, list_rprimitive, str_rprimitive, is_none_rprimitive, object_rprimitive,
3232
c_pyssize_t_rprimitive, is_short_int_rprimitive, is_tagged, PyVarObject, short_int_rprimitive,
3333
is_list_rprimitive, is_tuple_rprimitive, is_dict_rprimitive, is_set_rprimitive, PySetObject,
3434
none_rprimitive, RTuple, is_bool_rprimitive, is_str_rprimitive, c_int_rprimitive,
35-
pointer_rprimitive, PyObject, PyListObject, bit_rprimitive, is_bit_rprimitive,
36-
object_pointer_rprimitive, c_size_t_rprimitive, dict_rprimitive
35+
pointer_rprimitive, PyObject, bit_rprimitive, is_bit_rprimitive,
36+
object_pointer_rprimitive, c_size_t_rprimitive, dict_rprimitive, PyListObject
3737
)
3838
from mypyc.ir.func_ir import FuncDecl, FuncSignature
3939
from mypyc.ir.class_ir import ClassIR, all_concrete_classes
@@ -46,7 +46,7 @@
4646
binary_ops, unary_ops, ERR_NEG_INT
4747
)
4848
from mypyc.primitives.list_ops import (
49-
list_extend_op, new_list_op
49+
list_extend_op, new_list_op, list_build_op
5050
)
5151
from mypyc.primitives.tuple_ops import (
5252
list_tuple_op, new_tuple_op, new_tuple_with_length_op
@@ -78,6 +78,12 @@
7878

7979
DictEntry = Tuple[Optional[Value], Value]
8080

81+
# If the number of items is less than the threshold when initializing
82+
# a list, we would inline the generate IR using SetMem and expanded
83+
# for-loop. Otherwise, we would call `list_build_op` for larger lists.
84+
# TODO: The threshold is a randomly chosen number which needs further
85+
# study on real-world projects for a better balance.
86+
LIST_BUILDING_EXPANSION_THRESHOLD = 10
8187

8288
# From CPython
8389
PY_VECTORCALL_ARGUMENTS_OFFSET: Final = 1 << (PLATFORM_SIZE * 8 - 1)
@@ -669,7 +675,6 @@ def native_args_to_positional(self,
669675
# coercing everything to the expected type.
670676
output_args = []
671677
for lst, arg in zip(formal_to_actual, sig.args):
672-
output_arg = None
673678
if arg.kind == ARG_STAR:
674679
assert star_arg
675680
output_arg = star_arg
@@ -700,7 +705,7 @@ def gen_method_call(self,
700705
arg_names: Optional[List[Optional[str]]] = None) -> Value:
701706
"""Generate either a native or Python method call."""
702707
# If we have *args, then fallback to Python method call.
703-
if (arg_kinds is not None and any(kind.is_star() for kind in arg_kinds)):
708+
if arg_kinds is not None and any(kind.is_star() for kind in arg_kinds):
704709
return self.py_method_call(base, name, arg_values, base.line, arg_kinds, arg_names)
705710

706711
# If the base type is one of ours, do a MethodCall
@@ -766,7 +771,7 @@ def none(self) -> Value:
766771

767772
def true(self) -> Value:
768773
"""Load unboxed True value (type: bool_rprimitive)."""
769-
return Integer(1, bool_rprimitive)
774+
return Integer(1, bool_rprimitive)
770775

771776
def false(self) -> Value:
772777
"""Load unboxed False value (type: bool_rprimitive)."""
@@ -1008,7 +1013,7 @@ def compare_tuples(self,
10081013
return result
10091014
length = len(lhs.type.types)
10101015
false_assign, true_assign, out = BasicBlock(), BasicBlock(), BasicBlock()
1011-
check_blocks = [BasicBlock() for i in range(length)]
1016+
check_blocks = [BasicBlock() for _ in range(length)]
10121017
lhs_items = [self.add(TupleGet(lhs, i, line)) for i in range(length)]
10131018
rhs_items = [self.add(TupleGet(rhs, i, line)) for i in range(length)]
10141019

@@ -1137,8 +1142,15 @@ def new_list_op_with_length(self, length: Value, line: int) -> Value:
11371142
return self.call_c(new_list_op, [length], line)
11381143

11391144
def new_list_op(self, values: List[Value], line: int) -> Value:
1140-
length = Integer(len(values), c_pyssize_t_rprimitive, line)
1141-
result_list = self.call_c(new_list_op, [length], line)
1145+
length: List[Value] = [Integer(len(values), c_pyssize_t_rprimitive, line)]
1146+
if len(values) >= LIST_BUILDING_EXPANSION_THRESHOLD:
1147+
return self.call_c(list_build_op, length + values, line)
1148+
1149+
# If the length of the list is less than the threshold,
1150+
# LIST_BUILDING_EXPANSION_THRESHOLD, we directly expand the
1151+
# for-loop and inline the SetMem operation, which is faster
1152+
# than list_build_op, however generates more code.
1153+
result_list = self.call_c(new_list_op, length, line)
11421154
if len(values) == 0:
11431155
return result_list
11441156
args = [self.coerce(item, object_rprimitive, line) for item in values]
@@ -1174,7 +1186,7 @@ def shortcircuit_helper(self, op: str,
11741186
# Having actual Phi nodes would be really nice here!
11751187
target = Register(expr_type)
11761188
# left_body takes the value of the left side, right_body the right
1177-
left_body, right_body, next = BasicBlock(), BasicBlock(), BasicBlock()
1189+
left_body, right_body, next_block = BasicBlock(), BasicBlock(), BasicBlock()
11781190
# true_body is taken if the left is true, false_body if it is false.
11791191
# For 'and' the value is the right side if the left is true, and for 'or'
11801192
# it is the right side if the left is false.
@@ -1187,15 +1199,15 @@ def shortcircuit_helper(self, op: str,
11871199
self.activate_block(left_body)
11881200
left_coerced = self.coerce(left_value, expr_type, line)
11891201
self.add(Assign(target, left_coerced))
1190-
self.goto(next)
1202+
self.goto(next_block)
11911203

11921204
self.activate_block(right_body)
11931205
right_value = right()
11941206
right_coerced = self.coerce(right_value, expr_type, line)
11951207
self.add(Assign(target, right_coerced))
1196-
self.goto(next)
1208+
self.goto(next_block)
11971209

1198-
self.activate_block(next)
1210+
self.activate_block(next_block)
11991211
return target
12001212

12011213
def add_bool_branch(self, value: Value, true: BasicBlock, false: BasicBlock) -> None:

mypyc/lib-rt/CPy.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@ PyObject *CPyObject_GetSlice(PyObject *obj, CPyTagged start, CPyTagged end);
318318
// List operations
319319

320320

321+
PyObject *CPyList_Build(Py_ssize_t len, ...);
321322
PyObject *CPyList_GetItem(PyObject *list, CPyTagged index);
322323
PyObject *CPyList_GetItemUnsafe(PyObject *list, CPyTagged index);
323324
PyObject *CPyList_GetItemShort(PyObject *list, CPyTagged index);

mypyc/lib-rt/list_ops.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,26 @@
55
#include <Python.h>
66
#include "CPy.h"
77

8+
PyObject *CPyList_Build(Py_ssize_t len, ...) {
9+
Py_ssize_t i;
10+
11+
PyObject *res = PyList_New(len);
12+
if (res == NULL) {
13+
return NULL;
14+
}
15+
16+
va_list args;
17+
va_start(args, len);
18+
for (i = 0; i < len; i++) {
19+
// Steals the reference
20+
PyObject *value = va_arg(args, PyObject *);
21+
PyList_SET_ITEM(res, i, value);
22+
}
23+
va_end(args);
24+
25+
return res;
26+
}
27+
828
PyObject *CPyList_GetItemUnsafe(PyObject *list, CPyTagged index) {
929
Py_ssize_t n = CPyTagged_ShortAsSsize_t(index);
1030
PyObject *result = PyList_GET_ITEM(list, n);

mypyc/primitives/list_ops.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,22 @@
2222
arg_types=[object_rprimitive],
2323
return_type=list_rprimitive,
2424
c_function_name='PySequence_List',
25-
error_kind=ERR_MAGIC,
26-
)
25+
error_kind=ERR_MAGIC)
2726

2827
new_list_op = custom_op(
2928
arg_types=[c_pyssize_t_rprimitive],
3029
return_type=list_rprimitive,
3130
c_function_name='PyList_New',
3231
error_kind=ERR_MAGIC)
3332

33+
list_build_op = custom_op(
34+
arg_types=[c_pyssize_t_rprimitive],
35+
return_type=list_rprimitive,
36+
c_function_name='CPyList_Build',
37+
error_kind=ERR_MAGIC,
38+
var_arg_type=object_rprimitive,
39+
steals=True)
40+
3441
# list[index] (for an integer index)
3542
list_get_item_op = method_op(
3643
name='__getitem__',

mypyc/test-data/irbuild-lists.test

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,30 @@ L0:
9393
x = r0
9494
return 1
9595

96+
[case testNewListTenItems]
97+
from typing import List
98+
def f() -> None:
99+
x: List[str] = ['a', 'b', 'c', 'd', 'e',
100+
'f', 'g', 'h', 'i', 'j']
101+
[out]
102+
def f():
103+
r0, r1, r2, r3, r4, r5, r6, r7, r8, r9 :: str
104+
r10, x :: list
105+
L0:
106+
r0 = 'a'
107+
r1 = 'b'
108+
r2 = 'c'
109+
r3 = 'd'
110+
r4 = 'e'
111+
r5 = 'f'
112+
r6 = 'g'
113+
r7 = 'h'
114+
r8 = 'i'
115+
r9 = 'j'
116+
r10 = CPyList_Build(10, r0, r1, r2, r3, r4, r5, r6, r7, r8, r9)
117+
x = r10
118+
return 1
119+
96120
[case testListMultiply]
97121
from typing import List
98122
def f(a: List[int]) -> None:

mypyc/test-data/run-lists.test

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,25 @@ print(primes(13))
7575
\[0, 0, 1, 1]
7676
\[0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1]
7777

78+
[case testListBuild]
79+
def test_list_build() -> None:
80+
# Currently LIST_BUILDING_EXPANSION_THRESHOLD equals to 10
81+
# long list built by list_build_op
82+
l1 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
83+
l1.pop()
84+
l1.append(100)
85+
assert l1 == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 100]
86+
# short list built by Setmem
87+
l2 = [1, 2]
88+
l2.append(3)
89+
l2.pop()
90+
l2.pop()
91+
assert l2 == [1]
92+
# empty list
93+
l3 = []
94+
l3.append('a')
95+
assert l3 == ['a']
96+
7897
[case testListPrims]
7998
from typing import List
8099

0 commit comments

Comments
 (0)