Skip to content

Commit 4ffe564

Browse files
authored
Add pointers deduced types (#196)
* Add deducing pointer types Pointers are now deduced after multiple dereferences mixed with address of operators. If the type is defined in cpp2 it should be deduced. Pointer types are also deduced when returned from functions. Deducing of doesn't work for parameter_declaration_list_node. This change change `sema::get_local_declaration_of` to `sema::get_declaration_of` that takes additional parameter that makes it work locally or globally. * Add tests for deducing pointers Current limitiation is that when deducing pointers from functions the order of the functions matters - the deduced functions needs to be before deduced part of code. * Add comments to the code
1 parent 06730e3 commit 4ffe564

File tree

5 files changed

+199
-32
lines changed

5 files changed

+199
-32
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
fun: (inout i:int) -> *int = {
2+
return i&;
3+
}
4+
5+
fun2: (inout i:int) -> (result : *int) = {
6+
result = i&;
7+
}
8+
9+
main: (argc : int, argv : **char) -> int = {
10+
a: int = 2;
11+
pa: *int = a&;
12+
ppa: **int = pa&;
13+
14+
pa = 0; // caught
15+
16+
pa2:= ppa*;
17+
pa2 = 0; // caught
18+
19+
pa3 := a&;
20+
pa3 = 0; // caught
21+
pa3 += 2; // caught
22+
23+
ppa2 := pa2&;
24+
pa4 := ppa2*;
25+
pa4 = 0; // caught
26+
27+
pppa := ppa&;
28+
pa5 := pppa**;
29+
pa5 = 0; // caught
30+
31+
fun(a)++; // caught
32+
fp := fun(a);
33+
fp = 0; // caught
34+
35+
f := fun(a)*;
36+
37+
fp2 := fun2(a).result;
38+
fp2--; // not caught :(
39+
40+
return a * pa* * ppa**; // 8
41+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
pure2-deducing-pointers.cpp2...
2+
pure2-deducing-pointers.cpp2(14,8): error: = - pointer assignment from null or integer is illegal
3+
pure2-deducing-pointers.cpp2(17,9): error: = - pointer assignment from null or integer is illegal
4+
pure2-deducing-pointers.cpp2(20,9): error: = - pointer assignment from null or integer is illegal
5+
pure2-deducing-pointers.cpp2(21,9): error: += - pointer assignment from null or integer is illegal
6+
pure2-deducing-pointers.cpp2(25,9): error: = - pointer assignment from null or integer is illegal
7+
pure2-deducing-pointers.cpp2(29,9): error: = - pointer assignment from null or integer is illegal
8+
pure2-deducing-pointers.cpp2(31,11): error: ++ - pointer arithmetic is illegal - use std::span or gsl::span instead
9+
pure2-deducing-pointers.cpp2(33,8): error: = - pointer assignment from null or integer is illegal
10+
==> program violates lifetime safety guarantee - see previous errors
11+
==> program violates bounds safety guarantee - see previous errors
12+

source/cppfront.cpp

Lines changed: 113 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1082,7 +1082,7 @@ class cppfront
10821082

10831083
in_definite_init = is_definite_initialization(n.identifier);
10841084
if (!in_definite_init && !in_parameter_list) {
1085-
if (auto decl = sema.get_local_declaration_of(*n.identifier);
1085+
if (auto decl = sema.get_declaration_of(*n.identifier);
10861086
is_local_name &&
10871087
decl &&
10881088
// note pointer equality: if we're not in the actual declaration of n.identifier
@@ -1606,6 +1606,86 @@ class cppfront
16061606
}
16071607
}
16081608

1609+
// Don't work yet, TODO: finalize deducing pointer types from parameter lists
1610+
auto is_pointer_declaration(parameter_declaration_list_node const* decl_node, int deref_cnt, int addr_cnt) -> bool {
1611+
return false;
1612+
}
1613+
1614+
auto is_pointer_declaration(declaration_node const* decl_node, int deref_cnt, int addr_cnt) -> bool {
1615+
if (!decl_node) {
1616+
return false;
1617+
}
1618+
if (addr_cnt > deref_cnt) {
1619+
return true;
1620+
}
1621+
1622+
return std::visit([&](auto const& type){
1623+
return is_pointer_declaration(type.get(), deref_cnt, addr_cnt);
1624+
}, decl_node->type);
1625+
}
1626+
1627+
auto is_pointer_declaration(function_type_node const* fun_node, int deref_cnt, int addr_cnt) -> bool {
1628+
if (!fun_node) {
1629+
return false;
1630+
}
1631+
if (addr_cnt > deref_cnt) {
1632+
return true;
1633+
}
1634+
1635+
return std::visit([&]<typename T>(T const& type){
1636+
if constexpr (std::is_same_v<T, std::monostate>) {
1637+
return false;
1638+
} else {
1639+
return is_pointer_declaration(type.get(), deref_cnt, addr_cnt);
1640+
}
1641+
}, fun_node->returns);
1642+
}
1643+
1644+
auto is_pointer_declaration(type_id_node const* type_node, int deref_cnt, int addr_cnt) -> bool {
1645+
if (!type_node) {
1646+
return false;
1647+
}
1648+
if (addr_cnt > deref_cnt) {
1649+
return true;
1650+
}
1651+
1652+
if ( type_node->dereference_of ) {
1653+
return is_pointer_declaration(type_node->dereference_of, deref_cnt + type_node->dereference_cnt, addr_cnt);
1654+
} else if ( type_node->address_of ) {
1655+
return is_pointer_declaration(type_node->address_of, deref_cnt, addr_cnt + 1);
1656+
}
1657+
1658+
int pointer_declarators_cnt = std::count_if(std::cbegin(type_node->pc_qualifiers), std::cend(type_node->pc_qualifiers), [](auto* q) {
1659+
return q->type() == lexeme::Multiply;
1660+
});
1661+
1662+
if (pointer_declarators_cnt == 0 && type_node->suspicious_initialization) {
1663+
return is_pointer_declaration(type_node->suspicious_initialization, deref_cnt, addr_cnt);
1664+
}
1665+
1666+
return (pointer_declarators_cnt + addr_cnt - deref_cnt) > 0;
1667+
}
1668+
1669+
auto is_pointer_declaration(declaration_sym const* decl, int deref_cnt, int addr_cnt) -> bool {
1670+
if (!decl) {
1671+
return false;
1672+
}
1673+
if (addr_cnt > deref_cnt) {
1674+
return true;
1675+
}
1676+
return is_pointer_declaration(decl->declaration, deref_cnt, addr_cnt);
1677+
}
1678+
1679+
auto is_pointer_declaration(token const* t, int deref_cnt = 0, int addr_cnt = 0) -> bool {
1680+
if (!t) {
1681+
return false;
1682+
}
1683+
if (addr_cnt > deref_cnt) {
1684+
return true;
1685+
}
1686+
auto decl = sema.get_declaration_of(*t, true);
1687+
return is_pointer_declaration(decl, deref_cnt, addr_cnt);
1688+
}
16091689

16101690
//-----------------------------------------------------------------------
16111691
//
@@ -1625,7 +1705,7 @@ class cppfront
16251705
assert (n.expr->get_token());
16261706
assert (!current_args.back().ptoken);
16271707
current_args.back().ptoken = n.expr->get_token();
1628-
auto decl = sema.get_local_declaration_of(*current_args.back().ptoken);
1708+
auto decl = sema.get_declaration_of(*current_args.back().ptoken);
16291709
if (!(decl && decl->parameter && decl->parameter->pass == passing_style::forward)) {
16301710
errors.emplace_back(
16311711
n.position(),
@@ -1644,32 +1724,38 @@ class cppfront
16441724
{
16451725
auto& unqual = std::get<id_expression_node::unqualified>(id->id);
16461726
assert(unqual);
1647-
auto decl = sema.get_local_declaration_of(*unqual->identifier);
1648-
// TODO: Generalize this -- for now we detect only cases of the form "p: *int = ...;"
1649-
// We don't recognize pointer types that are deduced, multi-level, or from Cpp1
1650-
if (decl) {
1651-
if (auto t = std::get_if<declaration_node::object>(&decl->declaration->type); t && (*t)->is_pointer_qualified()) {
1652-
if (n.ops.empty()) {
1653-
last_postfix_expr_was_pointer = true;
1654-
}
1655-
else
1656-
{
1657-
if (n.ops.front().op->type() == lexeme::PlusPlus ||
1658-
n.ops.front().op->type() == lexeme::MinusMinus ||
1659-
n.ops.front().op->type() == lexeme::LeftBracket
1660-
) {
1661-
errors.emplace_back(
1662-
n.ops.front().op->position(),
1663-
n.ops.front().op->to_string(true) + " - pointer arithmetic is illegal - use std::span or gsl::span instead"
1664-
);
1665-
violates_bounds_safety = true;
1666-
}
1667-
else if (n.ops.front().op->type() == lexeme::Tilde) {
1668-
errors.emplace_back(
1669-
n.ops.front().op->position(),
1670-
n.ops.front().op->to_string(true) + " - pointer bitwise manipulation is illegal - use std::bit_cast to convert to raw bytes first"
1671-
);
1727+
// TODO: Generalize this:
1728+
// - we don't recognize pointer types from Cpp1
1729+
// - we don't deduce pointer types from parameter_declaration_list_node
1730+
if ( is_pointer_declaration(unqual->identifier) ) {
1731+
if (n.ops.empty()) {
1732+
last_postfix_expr_was_pointer = true;
1733+
}
1734+
else
1735+
{
1736+
auto op = [&](){
1737+
if (n.ops.size() >= 2 && n.ops[0].op->type() == lexeme::LeftParen) {
1738+
return n.ops[1].op;
1739+
} else {
1740+
return n.ops.front().op;
16721741
}
1742+
}();
1743+
1744+
if (op->type() == lexeme::PlusPlus ||
1745+
op->type() == lexeme::MinusMinus ||
1746+
op->type() == lexeme::LeftBracket
1747+
) {
1748+
errors.emplace_back(
1749+
op->position(),
1750+
op->to_string(true) + " - pointer arithmetic is illegal - use std::span or gsl::span instead"
1751+
);
1752+
violates_bounds_safety = true;
1753+
}
1754+
else if (op->type() == lexeme::Tilde) {
1755+
errors.emplace_back(
1756+
op->position(),
1757+
op->to_string(true) + " - pointer bitwise manipulation is illegal - use std::bit_cast to convert to raw bytes first"
1758+
);
16731759
}
16741760
}
16751761
}

source/parse.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,10 @@ struct type_id_node
531531
source_position pos;
532532

533533
std::vector<token const*> pc_qualifiers;
534+
token const* address_of = {};
535+
token const* dereference_of = {};
536+
int dereference_cnt = {};
537+
token const* suspicious_initialization = {};
534538

535539
enum active { empty=0, qualified, unqualified, keyword };
536540
std::variant<
@@ -3310,6 +3314,29 @@ class parser
33103314
}
33113315
}
33123316

3317+
// deduced_type == true means that the type will be deduced,
3318+
// represented using an empty type-id
3319+
if (deduced_type) {
3320+
auto& type = std::get<declaration_node::object>(n->type);
3321+
// object initialized by the address of the curr() object
3322+
if (peek(1)->type() == lexeme::Ampersand) {
3323+
type->address_of = &curr();
3324+
}
3325+
// object initialized by (potentially multiple) dereference of the curr() object
3326+
else if (peek(1)->type() == lexeme::Multiply) {
3327+
type->dereference_of = &curr();
3328+
for (int i = 1; peek(i)->type() == lexeme::Multiply; ++i)
3329+
type->dereference_cnt += 1;
3330+
}
3331+
else if (
3332+
// object initialized by the result of the function call (and it is not unnamed function)
3333+
(peek(1)->type() == lexeme::LeftParen && curr().type() != lexeme::Colon)
3334+
|| curr().type() == lexeme::Identifier // or by the object (variable that the type need to be checked)
3335+
) {
3336+
type->suspicious_initialization = &curr();
3337+
}
3338+
}
3339+
33133340
if (!(n->initializer = statement(semicolon_required, n->equal_sign))) {
33143341
error("ill-formed initializer");
33153342
next();

source/sema.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -226,9 +226,9 @@ class sema
226226
{
227227
}
228228

229-
// Get the declaration of t within the same named function
229+
// Get the declaration of t within the same named function or beyound it
230230
//
231-
auto get_local_declaration_of(token const& t) -> declaration_sym const*
231+
auto get_declaration_of(token const& t, bool look_beyond_current_function = false) -> declaration_sym const*
232232
{
233233
// First find the position the query is coming from
234234
// and remember its depth
@@ -252,11 +252,12 @@ class sema
252252
{
253253
auto const& decl = std::get<symbol::active::declaration>(ri->sym);
254254

255-
// Don't look beyond the start of the current named (has identifier) function
255+
// Conditionally look beyond the start of the current named (has identifier) function
256256
// (an unnamed function is ok to look beyond)
257257
assert(decl.declaration);
258258
if (decl.declaration->type.index() == declaration_node::function &&
259-
decl.declaration->identifier)
259+
decl.declaration->identifier &&
260+
!look_beyond_current_function)
260261
{
261262
return nullptr;
262263
}
@@ -883,7 +884,7 @@ class sema
883884
{
884885
// Put this into the table if it's a use of an object in scope
885886
// or it's a 'copy' parameter
886-
if (auto decl = get_local_declaration_of(t);
887+
if (auto decl = get_declaration_of(t);
887888
decl
888889
)
889890
{

0 commit comments

Comments
 (0)