Skip to content

Add pointers deduced types #196

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions regression-tests/pure2-deducing-pointers.cpp2
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
fun: (inout i:int) -> *int = {
return i&;
}

fun2: (inout i:int) -> (result : *int) = {
result = i&;
}

main: (argc : int, argv : **char) -> int = {
a: int = 2;
pa: *int = a&;
ppa: **int = pa&;

pa = 0; // caught

pa2:= ppa*;
pa2 = 0; // caught

pa3 := a&;
pa3 = 0; // caught
pa3 += 2; // caught

ppa2 := pa2&;
pa4 := ppa2*;
pa4 = 0; // caught

pppa := ppa&;
pa5 := pppa**;
pa5 = 0; // caught

fun(a)++; // caught
fp := fun(a);
fp = 0; // caught

f := fun(a)*;

fp2 := fun2(a).result;
fp2--; // not caught :(

return a * pa* * ppa**; // 8
}
12 changes: 12 additions & 0 deletions regression-tests/test-results/pure2-deducing-pointers.cpp2.output
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
pure2-deducing-pointers.cpp2...
pure2-deducing-pointers.cpp2(14,8): error: = - pointer assignment from null or integer is illegal
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for including a new regression test.

Minor naming thing that I can clean up post-merge: Tests that are intended to fail cppfront compilation (not generate a .cpp file) should have a name that ends in ...error.cpp2. That way my super-simple run-tests.bat test runner will count it as not intending to to create a .cpp and not flag a mismatch error. I'll take care of this post-merge though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right... will correct that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I see that you have merged that. Should I prepare another PR with the name change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By the way. Thank you for merging that. Appreciate it!

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, I did in another commit. All's well, thanks!

pure2-deducing-pointers.cpp2(17,9): error: = - pointer assignment from null or integer is illegal
pure2-deducing-pointers.cpp2(20,9): error: = - pointer assignment from null or integer is illegal
pure2-deducing-pointers.cpp2(21,9): error: += - pointer assignment from null or integer is illegal
pure2-deducing-pointers.cpp2(25,9): error: = - pointer assignment from null or integer is illegal
pure2-deducing-pointers.cpp2(29,9): error: = - pointer assignment from null or integer is illegal
pure2-deducing-pointers.cpp2(31,11): error: ++ - pointer arithmetic is illegal - use std::span or gsl::span instead
pure2-deducing-pointers.cpp2(33,8): error: = - pointer assignment from null or integer is illegal
==> program violates lifetime safety guarantee - see previous errors
==> program violates bounds safety guarantee - see previous errors

140 changes: 113 additions & 27 deletions source/cppfront.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1211,7 +1211,7 @@ class cppfront

in_definite_init = is_definite_initialization(n.identifier);
if (!in_definite_init && !in_parameter_list) {
if (auto decl = sema.get_local_declaration_of(*n.identifier);
if (auto decl = sema.get_declaration_of(*n.identifier);
is_local_name &&
decl &&
// note pointer equality: if we're not in the actual declaration of n.identifier
Expand Down Expand Up @@ -1735,6 +1735,86 @@ class cppfront
}
}

// Don't work yet, TODO: finalize deducing pointer types from parameter lists
auto is_pointer_declaration(parameter_declaration_list_node const* decl_node, int deref_cnt, int addr_cnt) -> bool {
return false;
}

auto is_pointer_declaration(declaration_node const* decl_node, int deref_cnt, int addr_cnt) -> bool {
if (!decl_node) {
return false;
}
if (addr_cnt > deref_cnt) {
return true;
}

return std::visit([&](auto const& type){
return is_pointer_declaration(type.get(), deref_cnt, addr_cnt);
}, decl_node->type);
}

auto is_pointer_declaration(function_type_node const* fun_node, int deref_cnt, int addr_cnt) -> bool {
if (!fun_node) {
return false;
}
if (addr_cnt > deref_cnt) {
return true;
}

return std::visit([&]<typename T>(T const& type){
if constexpr (std::is_same_v<T, std::monostate>) {
return false;
} else {
return is_pointer_declaration(type.get(), deref_cnt, addr_cnt);
}
}, fun_node->returns);
}

auto is_pointer_declaration(type_id_node const* type_node, int deref_cnt, int addr_cnt) -> bool {
if (!type_node) {
return false;
}
if (addr_cnt > deref_cnt) {
return true;
}

if ( type_node->dereference_of ) {
return is_pointer_declaration(type_node->dereference_of, deref_cnt + type_node->dereference_cnt, addr_cnt);
} else if ( type_node->address_of ) {
return is_pointer_declaration(type_node->address_of, deref_cnt, addr_cnt + 1);
}

int pointer_declarators_cnt = std::count_if(std::cbegin(type_node->pc_qualifiers), std::cend(type_node->pc_qualifiers), [](auto* q) {
return q->type() == lexeme::Multiply;
});

if (pointer_declarators_cnt == 0 && type_node->suspicious_initialization) {
return is_pointer_declaration(type_node->suspicious_initialization, deref_cnt, addr_cnt);
}

return (pointer_declarators_cnt + addr_cnt - deref_cnt) > 0;
}

auto is_pointer_declaration(declaration_sym const* decl, int deref_cnt, int addr_cnt) -> bool {
if (!decl) {
return false;
}
if (addr_cnt > deref_cnt) {
return true;
}
return is_pointer_declaration(decl->declaration, deref_cnt, addr_cnt);
}

auto is_pointer_declaration(token const* t, int deref_cnt = 0, int addr_cnt = 0) -> bool {
if (!t) {
return false;
}
if (addr_cnt > deref_cnt) {
return true;
}
auto decl = sema.get_declaration_of(*t, true);
return is_pointer_declaration(decl, deref_cnt, addr_cnt);
}

//-----------------------------------------------------------------------
//
Expand All @@ -1754,7 +1834,7 @@ class cppfront
assert (n.expr->get_token());
assert (!current_args.back().ptoken);
current_args.back().ptoken = n.expr->get_token();
auto decl = sema.get_local_declaration_of(*current_args.back().ptoken);
auto decl = sema.get_declaration_of(*current_args.back().ptoken);
if (!(decl && decl->parameter && decl->parameter->pass == passing_style::forward)) {
errors.emplace_back(
n.position(),
Expand All @@ -1773,32 +1853,38 @@ class cppfront
{
auto& unqual = std::get<id_expression_node::unqualified>(id->id);
assert(unqual);
auto decl = sema.get_local_declaration_of(*unqual->identifier);
// TODO: Generalize this -- for now we detect only cases of the form "p: *int = ...;"
// We don't recognize pointer types that are deduced, multi-level, or from Cpp1
if (decl) {
if (auto t = std::get_if<declaration_node::object>(&decl->declaration->type); t && (*t)->is_pointer_qualified()) {
if (n.ops.empty()) {
last_postfix_expr_was_pointer = true;
}
else
{
if (n.ops.front().op->type() == lexeme::PlusPlus ||
n.ops.front().op->type() == lexeme::MinusMinus ||
n.ops.front().op->type() == lexeme::LeftBracket
) {
errors.emplace_back(
n.ops.front().op->position(),
n.ops.front().op->to_string(true) + " - pointer arithmetic is illegal - use std::span or gsl::span instead"
);
violates_bounds_safety = true;
}
else if (n.ops.front().op->type() == lexeme::Tilde) {
errors.emplace_back(
n.ops.front().op->position(),
n.ops.front().op->to_string(true) + " - pointer bitwise manipulation is illegal - use std::bit_cast to convert to raw bytes first"
);
// TODO: Generalize this:
// - we don't recognize pointer types from Cpp1
// - we don't deduce pointer types from parameter_declaration_list_node
if ( is_pointer_declaration(unqual->identifier) ) {
if (n.ops.empty()) {
last_postfix_expr_was_pointer = true;
}
else
{
auto op = [&](){
if (n.ops.size() >= 2 && n.ops[0].op->type() == lexeme::LeftParen) {
return n.ops[1].op;
} else {
return n.ops.front().op;
}
}();

if (op->type() == lexeme::PlusPlus ||
op->type() == lexeme::MinusMinus ||
op->type() == lexeme::LeftBracket
) {
errors.emplace_back(
op->position(),
op->to_string(true) + " - pointer arithmetic is illegal - use std::span or gsl::span instead"
);
violates_bounds_safety = true;
}
else if (op->type() == lexeme::Tilde) {
errors.emplace_back(
op->position(),
op->to_string(true) + " - pointer bitwise manipulation is illegal - use std::bit_cast to convert to raw bytes first"
);
Copy link
Owner

@hsutter hsutter Jan 9, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't had the cycles to really grok this, and the main reason I'm reading through the PR is because (a) the test case at top is clear and (b) the code change is mostly localized here, both of which are strengths (thanks!). That also limits the downside because it should be very local.

I'm not yet sure whether I should merge this... In the long term I aim to implement my C++ Core Guidelines Lifetime analysis, and that will likely replace at least parts of this work. But if we do merge this now, I still view it as an interim '[expanded] initial partial implementation' and don't think we would want to go further in this direction in the near term. First we would want to implement the Lifetime analysis and then see where we are, and the Lifetime implementation itself needs to be farther in the future after classes and metaclass functions.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main change here is about calling a new function is_pointer_declaration(unqual->identifier) and to check not only the operator that is called on the node but also the results of the function call (e.g. n()++)

}
}
}
Expand Down
27 changes: 27 additions & 0 deletions source/parse.h
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,10 @@ struct type_id_node
source_position pos;

std::vector<token const*> pc_qualifiers;
token const* address_of = {};
token const* dereference_of = {};
int dereference_cnt = {};
token const* suspicious_initialization = {};

enum active { empty=0, qualified, unqualified, keyword };
std::variant<
Expand Down Expand Up @@ -3310,6 +3314,29 @@ class parser
}
}

// deduced_type == true means that the type will be deduced,
// represented using an empty type-id
if (deduced_type) {
auto& type = std::get<declaration_node::object>(n->type);
// object initialized by the address of the curr() object
if (peek(1)->type() == lexeme::Ampersand) {
type->address_of = &curr();
}
// object initialized by (potentially multiple) dereference of the curr() object
else if (peek(1)->type() == lexeme::Multiply) {
type->dereference_of = &curr();
for (int i = 1; peek(i)->type() == lexeme::Multiply; ++i)
type->dereference_cnt += 1;
}
else if (
// object initialized by the result of the function call (and it is not unnamed function)
(peek(1)->type() == lexeme::LeftParen && curr().type() != lexeme::Colon)
|| curr().type() == lexeme::Identifier // or by the object (variable that the type need to be checked)
) {
type->suspicious_initialization = &curr();
}
}

if (!(n->initializer = statement(semicolon_required, n->equal_sign))) {
error("ill-formed initializer");
next();
Expand Down
11 changes: 6 additions & 5 deletions source/sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,9 @@ class sema
{
}

// Get the declaration of t within the same named function
// Get the declaration of t within the same named function or beyound it
//
auto get_local_declaration_of(token const& t) -> declaration_sym const*
auto get_declaration_of(token const& t, bool look_beyond_current_function = false) -> declaration_sym const*
{
// First find the position the query is coming from
// and remember its depth
Expand All @@ -252,11 +252,12 @@ class sema
{
auto const& decl = std::get<symbol::active::declaration>(ri->sym);

// Don't look beyond the start of the current named (has identifier) function
// Conditionally look beyond the start of the current named (has identifier) function
// (an unnamed function is ok to look beyond)
assert(decl.declaration);
if (decl.declaration->type.index() == declaration_node::function &&
decl.declaration->identifier)
decl.declaration->identifier &&
!look_beyond_current_function)
{
return nullptr;
}
Expand Down Expand Up @@ -883,7 +884,7 @@ class sema
{
// Put this into the table if it's a use of an object in scope
// or it's a 'copy' parameter
if (auto decl = get_local_declaration_of(t);
if (auto decl = get_declaration_of(t);
decl
)
{
Expand Down