Skip to content

[mypyc] Build lists using a primitive op #10807

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jul 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 26 additions & 14 deletions mypyc/irbuild/ll_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,16 @@
GetAttr, LoadStatic, MethodCall, CallC, Truncate, LoadLiteral, AssignMulti,
RaiseStandardError, Unreachable, LoadErrorValue,
NAMESPACE_TYPE, NAMESPACE_MODULE, NAMESPACE_STATIC, IntOp, GetElementPtr,
LoadMem, ComparisonOp, LoadAddress, TupleGet, SetMem, KeepAlive, ERR_NEVER, ERR_FALSE
LoadMem, ComparisonOp, LoadAddress, TupleGet, KeepAlive, ERR_NEVER, ERR_FALSE, SetMem
)
from mypyc.ir.rtypes import (
RType, RUnion, RInstance, RArray, optional_value_type, int_rprimitive, float_rprimitive,
bool_rprimitive, list_rprimitive, str_rprimitive, is_none_rprimitive, object_rprimitive,
c_pyssize_t_rprimitive, is_short_int_rprimitive, is_tagged, PyVarObject, short_int_rprimitive,
is_list_rprimitive, is_tuple_rprimitive, is_dict_rprimitive, is_set_rprimitive, PySetObject,
none_rprimitive, RTuple, is_bool_rprimitive, is_str_rprimitive, c_int_rprimitive,
pointer_rprimitive, PyObject, PyListObject, bit_rprimitive, is_bit_rprimitive,
object_pointer_rprimitive, c_size_t_rprimitive, dict_rprimitive
pointer_rprimitive, PyObject, bit_rprimitive, is_bit_rprimitive,
object_pointer_rprimitive, c_size_t_rprimitive, dict_rprimitive, PyListObject
)
from mypyc.ir.func_ir import FuncDecl, FuncSignature
from mypyc.ir.class_ir import ClassIR, all_concrete_classes
Expand All @@ -46,7 +46,7 @@
binary_ops, unary_ops, ERR_NEG_INT
)
from mypyc.primitives.list_ops import (
list_extend_op, new_list_op
list_extend_op, new_list_op, list_build_op
)
from mypyc.primitives.tuple_ops import (
list_tuple_op, new_tuple_op, new_tuple_with_length_op
Expand Down Expand Up @@ -77,6 +77,12 @@

DictEntry = Tuple[Optional[Value], Value]

# If the number of items is less than the threshold when initializing
# a list, we would inline the generate IR using SetMem and expanded
# for-loop. Otherwise, we would call `list_build_op` for larger lists.
# TODO: The threshold is a randomly chosen number which needs further
# study on real-world projects for a better balance.
LIST_BUILDING_EXPANSION_THRESHOLD = 10

# From CPython
PY_VECTORCALL_ARGUMENTS_OFFSET: Final = 1 << (PLATFORM_SIZE * 8 - 1)
Expand Down Expand Up @@ -460,7 +466,6 @@ def native_args_to_positional(self,
# coercing everything to the expected type.
output_args = []
for lst, arg in zip(formal_to_actual, sig.args):
output_arg = None
if arg.kind == ARG_STAR:
items = [args[i] for i in lst]
output_arg = self.new_tuple(items, line)
Expand All @@ -486,7 +491,7 @@ def gen_method_call(self,
arg_names: Optional[List[Optional[str]]] = None) -> Value:
"""Generate either a native or Python method call."""
# If we have *args, then fallback to Python method call.
if (arg_kinds is not None and any(kind.is_star() for kind in arg_kinds)):
if arg_kinds is not None and any(kind.is_star() for kind in arg_kinds):
return self.py_method_call(base, name, arg_values, base.line, arg_kinds, arg_names)

# If the base type is one of ours, do a MethodCall
Expand Down Expand Up @@ -552,7 +557,7 @@ def none(self) -> Value:

def true(self) -> Value:
"""Load unboxed True value (type: bool_rprimitive)."""
return Integer(1, bool_rprimitive)
return Integer(1, bool_rprimitive)

def false(self) -> Value:
"""Load unboxed False value (type: bool_rprimitive)."""
Expand Down Expand Up @@ -798,7 +803,7 @@ def compare_tuples(self,
return result
length = len(lhs.type.types)
false_assign, true_assign, out = BasicBlock(), BasicBlock(), BasicBlock()
check_blocks = [BasicBlock() for i in range(length)]
check_blocks = [BasicBlock() for _ in range(length)]
lhs_items = [self.add(TupleGet(lhs, i, line)) for i in range(length)]
rhs_items = [self.add(TupleGet(rhs, i, line)) for i in range(length)]

Expand Down Expand Up @@ -927,8 +932,15 @@ def new_list_op_with_length(self, length: Value, line: int) -> Value:
return self.call_c(new_list_op, [length], line)

def new_list_op(self, values: List[Value], line: int) -> Value:
length = Integer(len(values), c_pyssize_t_rprimitive, line)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is make_list a good name? since we have another new_list_op_with_length for creating empty list.

Copy link
Collaborator

@TH3CHARLie TH3CHARLie Jul 15, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ummm, I saw we have different namings now like make_dict and new_set_op, I think we should adopt one and stick to it, like make_xxx or new_xxx_op but not both.

result_list = self.call_c(new_list_op, [length], line)
length: List[Value] = [Integer(len(values), c_pyssize_t_rprimitive, line)]
if len(values) >= LIST_BUILDING_EXPANSION_THRESHOLD:
return self.call_c(list_build_op, length + values, line)

# If the length of the list is less than the threshold,
# LIST_BUILDING_EXPANSION_THRESHOLD, we directly expand the
# for-loop and inline the SetMem operation, which is faster
# than list_build_op, however generates more code.
result_list = self.call_c(new_list_op, length, line)
if len(values) == 0:
return result_list
args = [self.coerce(item, object_rprimitive, line) for item in values]
Expand Down Expand Up @@ -964,7 +976,7 @@ def shortcircuit_helper(self, op: str,
# Having actual Phi nodes would be really nice here!
target = Register(expr_type)
# left_body takes the value of the left side, right_body the right
left_body, right_body, next = BasicBlock(), BasicBlock(), BasicBlock()
left_body, right_body, next_block = BasicBlock(), BasicBlock(), BasicBlock()
# true_body is taken if the left is true, false_body if it is false.
# For 'and' the value is the right side if the left is true, and for 'or'
# it is the right side if the left is false.
Expand All @@ -977,15 +989,15 @@ def shortcircuit_helper(self, op: str,
self.activate_block(left_body)
left_coerced = self.coerce(left_value, expr_type, line)
self.add(Assign(target, left_coerced))
self.goto(next)
self.goto(next_block)

self.activate_block(right_body)
right_value = right()
right_coerced = self.coerce(right_value, expr_type, line)
self.add(Assign(target, right_coerced))
self.goto(next)
self.goto(next_block)

self.activate_block(next)
self.activate_block(next_block)
return target

def add_bool_branch(self, value: Value, true: BasicBlock, false: BasicBlock) -> None:
Expand Down
1 change: 1 addition & 0 deletions mypyc/lib-rt/CPy.h
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@ PyObject *CPyObject_GetSlice(PyObject *obj, CPyTagged start, CPyTagged end);
// List operations


PyObject *CPyList_Build(Py_ssize_t len, ...);
PyObject *CPyList_GetItem(PyObject *list, CPyTagged index);
PyObject *CPyList_GetItemUnsafe(PyObject *list, CPyTagged index);
PyObject *CPyList_GetItemShort(PyObject *list, CPyTagged index);
Expand Down
20 changes: 20 additions & 0 deletions mypyc/lib-rt/list_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,26 @@
#include <Python.h>
#include "CPy.h"

PyObject *CPyList_Build(Py_ssize_t len, ...) {
Py_ssize_t i;

PyObject *res = PyList_New(len);
if (res == NULL) {
return NULL;
}

va_list args;
va_start(args, len);
for (i = 0; i < len; i++) {
// Steals the reference
PyObject *value = va_arg(args, PyObject *);
PyList_SET_ITEM(res, i, value);
}
va_end(args);

return res;
}

PyObject *CPyList_GetItemUnsafe(PyObject *list, CPyTagged index) {
Py_ssize_t n = CPyTagged_ShortAsSsize_t(index);
PyObject *result = PyList_GET_ITEM(list, n);
Expand Down
11 changes: 9 additions & 2 deletions mypyc/primitives/list_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,22 @@
arg_types=[object_rprimitive],
return_type=list_rprimitive,
c_function_name='PySequence_List',
error_kind=ERR_MAGIC,
)
error_kind=ERR_MAGIC)

new_list_op = custom_op(
arg_types=[c_pyssize_t_rprimitive],
return_type=list_rprimitive,
c_function_name='PyList_New',
error_kind=ERR_MAGIC)

list_build_op = custom_op(
arg_types=[c_pyssize_t_rprimitive],
return_type=list_rprimitive,
c_function_name='CPyList_Build',
error_kind=ERR_MAGIC,
var_arg_type=object_rprimitive,
steals=True)

# list[index] (for an integer index)
list_get_item_op = method_op(
name='__getitem__',
Expand Down
24 changes: 24 additions & 0 deletions mypyc/test-data/irbuild-lists.test
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,30 @@ L0:
x = r0
return 1

[case testNewListTenItems]
from typing import List
def f() -> None:
x: List[str] = ['a', 'b', 'c', 'd', 'e',
'f', 'g', 'h', 'i', 'j']
[out]
def f():
r0, r1, r2, r3, r4, r5, r6, r7, r8, r9 :: str
r10, x :: list
L0:
r0 = 'a'
r1 = 'b'
r2 = 'c'
r3 = 'd'
r4 = 'e'
r5 = 'f'
r6 = 'g'
r7 = 'h'
r8 = 'i'
r9 = 'j'
r10 = CPyList_Build(10, r0, r1, r2, r3, r4, r5, r6, r7, r8, r9)
x = r10
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have IR tests for both short and long list literals?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have dozens of list building cases, whose length is less than 10, in other irbuilding tests. So I think we only need one more test for building a 10 items list.

return 1

[case testListMultiply]
from typing import List
def f(a: List[int]) -> None:
Expand Down
19 changes: 19 additions & 0 deletions mypyc/test-data/run-lists.test
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,25 @@ print(primes(13))
\[0, 0, 1, 1]
\[0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1]

[case testListBuild]
def test_list_build() -> None:
# Currently LIST_BUILDING_EXPANSION_THRESHOLD equals to 10
# long list built by list_build_op
l1 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
l1.pop()
l1.append(100)
assert l1 == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 100]
# short list built by Setmem
l2 = [1, 2]
l2.append(3)
l2.pop()
l2.pop()
assert l2 == [1]
# empty list
l3 = []
l3.append('a')
assert l3 == ['a']

[case testListPrims]
from typing import List

Expand Down