-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LLDB] Add array subscription and integer parsing to DIL #138551
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-lldb Author: Ilia Kuklin (kuilpd) ChangesPatch is 27.37 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138551.diff 13 Files Affected:
diff --git a/lldb/docs/dil-expr-lang.ebnf b/lldb/docs/dil-expr-lang.ebnf
index c8bf4231b3e4a..0cbb5403785db 100644
--- a/lldb/docs/dil-expr-lang.ebnf
+++ b/lldb/docs/dil-expr-lang.ebnf
@@ -6,16 +6,20 @@
expression = unary_expression ;
unary_expression = unary_operator expression
- | primary_expression ;
+ | postfix_expression ;
unary_operator = "*" | "&" ;
-primary_expression = id_expression
+postfix_expression = primary_expression
+ | postfix_expression "[" expression "]";
+
+primary_expression = numeric_literal
+ | id_expression
| "(" expression ")";
id_expression = unqualified_id
| qualified_id
- | register ;
+ | register ;
unqualified_id = identifier ;
@@ -24,6 +28,8 @@ qualified_id = ["::"] [nested_name_specifier] unqualified_id
identifier = ? C99 Identifier ? ;
+numeric_literal = ? C99 Integer constant ? ;
+
register = "$" ? Register name ? ;
nested_name_specifier = type_name "::"
diff --git a/lldb/include/lldb/ValueObject/DILAST.h b/lldb/include/lldb/ValueObject/DILAST.h
index fe3827ef0516a..6908deed7aee3 100644
--- a/lldb/include/lldb/ValueObject/DILAST.h
+++ b/lldb/include/lldb/ValueObject/DILAST.h
@@ -18,8 +18,10 @@ namespace lldb_private::dil {
/// The various types DIL AST nodes (used by the DIL parser).
enum class NodeKind {
+ eArraySubscriptNode,
eErrorNode,
eIdentifierNode,
+ eScalarLiteralNode,
eUnaryOpNode,
};
@@ -71,6 +73,26 @@ class ErrorNode : public ASTNode {
}
};
+class ScalarLiteralNode : public ASTNode {
+public:
+ ScalarLiteralNode(uint32_t location, lldb::BasicType type, Scalar value)
+ : ASTNode(location, NodeKind::eScalarLiteralNode), m_type(type),
+ m_value(value) {}
+
+ llvm::Expected<lldb::ValueObjectSP> Accept(Visitor *v) const override;
+
+ lldb::BasicType GetType() const { return m_type; }
+ Scalar GetValue() const & { return m_value; }
+
+ static bool classof(const ASTNode *node) {
+ return node->GetKind() == NodeKind::eScalarLiteralNode;
+ }
+
+private:
+ lldb::BasicType m_type;
+ Scalar m_value;
+};
+
class IdentifierNode : public ASTNode {
public:
IdentifierNode(uint32_t location, std::string name)
@@ -108,6 +130,26 @@ class UnaryOpNode : public ASTNode {
ASTNodeUP m_operand;
};
+class ArraySubscriptNode : public ASTNode {
+public:
+ ArraySubscriptNode(uint32_t location, ASTNodeUP lhs, ASTNodeUP rhs)
+ : ASTNode(location, NodeKind::eArraySubscriptNode), m_lhs(std::move(lhs)),
+ m_rhs(std::move(rhs)) {}
+
+ llvm::Expected<lldb::ValueObjectSP> Accept(Visitor *v) const override;
+
+ ASTNode *lhs() const { return m_lhs.get(); }
+ ASTNode *rhs() const { return m_rhs.get(); }
+
+ static bool classof(const ASTNode *node) {
+ return node->GetKind() == NodeKind::eArraySubscriptNode;
+ }
+
+private:
+ ASTNodeUP m_lhs;
+ ASTNodeUP m_rhs;
+};
+
/// This class contains one Visit method for each specialized type of
/// DIL AST node. The Visit methods are used to dispatch a DIL AST node to
/// the correct function in the DIL expression evaluator for evaluating that
@@ -116,9 +158,13 @@ class Visitor {
public:
virtual ~Visitor() = default;
virtual llvm::Expected<lldb::ValueObjectSP>
+ Visit(const ScalarLiteralNode *node) = 0;
+ virtual llvm::Expected<lldb::ValueObjectSP>
Visit(const IdentifierNode *node) = 0;
virtual llvm::Expected<lldb::ValueObjectSP>
Visit(const UnaryOpNode *node) = 0;
+ virtual llvm::Expected<lldb::ValueObjectSP>
+ Visit(const ArraySubscriptNode *node) = 0;
};
} // namespace lldb_private::dil
diff --git a/lldb/include/lldb/ValueObject/DILEval.h b/lldb/include/lldb/ValueObject/DILEval.h
index b1dd3fdb49739..e3df80862b082 100644
--- a/lldb/include/lldb/ValueObject/DILEval.h
+++ b/lldb/include/lldb/ValueObject/DILEval.h
@@ -47,9 +47,15 @@ class Interpreter : Visitor {
llvm::Expected<lldb::ValueObjectSP> Evaluate(const ASTNode *node);
private:
+ llvm::Expected<lldb::ValueObjectSP>
+ Visit(const ScalarLiteralNode *node) override;
llvm::Expected<lldb::ValueObjectSP>
Visit(const IdentifierNode *node) override;
llvm::Expected<lldb::ValueObjectSP> Visit(const UnaryOpNode *node) override;
+ llvm::Expected<lldb::ValueObjectSP>
+ Visit(const ArraySubscriptNode *node) override;
+
+ lldb::ValueObjectSP PointerAdd(lldb::ValueObjectSP lhs, int64_t offset);
// Used by the interpreter to create objects, perform casts, etc.
lldb::TargetSP m_target;
diff --git a/lldb/include/lldb/ValueObject/DILLexer.h b/lldb/include/lldb/ValueObject/DILLexer.h
index 3508b8b7a85c6..0176db73835e9 100644
--- a/lldb/include/lldb/ValueObject/DILLexer.h
+++ b/lldb/include/lldb/ValueObject/DILLexer.h
@@ -29,7 +29,10 @@ class Token {
eof,
identifier,
l_paren,
+ l_square,
+ numeric_constant,
r_paren,
+ r_square,
star,
};
diff --git a/lldb/include/lldb/ValueObject/DILParser.h b/lldb/include/lldb/ValueObject/DILParser.h
index f5c00b1040ef4..af237ece0228d 100644
--- a/lldb/include/lldb/ValueObject/DILParser.h
+++ b/lldb/include/lldb/ValueObject/DILParser.h
@@ -84,12 +84,15 @@ class DILParser {
ASTNodeUP ParseExpression();
ASTNodeUP ParseUnaryExpression();
+ ASTNodeUP ParsePostfixExpression();
ASTNodeUP ParsePrimaryExpression();
std::string ParseNestedNameSpecifier();
std::string ParseIdExpression();
std::string ParseUnqualifiedId();
+ ASTNodeUP ParseNumericLiteral();
+ ASTNodeUP ParseNumericConstant();
void BailOut(const std::string &error, uint32_t loc, uint16_t err_len);
diff --git a/lldb/source/ValueObject/DILAST.cpp b/lldb/source/ValueObject/DILAST.cpp
index ea847587501ee..ceb4a4aa99c4f 100644
--- a/lldb/source/ValueObject/DILAST.cpp
+++ b/lldb/source/ValueObject/DILAST.cpp
@@ -15,6 +15,11 @@ llvm::Expected<lldb::ValueObjectSP> ErrorNode::Accept(Visitor *v) const {
llvm_unreachable("Attempting to Visit a DIL ErrorNode.");
}
+llvm::Expected<lldb::ValueObjectSP>
+ScalarLiteralNode::Accept(Visitor *v) const {
+ return v->Visit(this);
+}
+
llvm::Expected<lldb::ValueObjectSP> IdentifierNode::Accept(Visitor *v) const {
return v->Visit(this);
}
@@ -23,4 +28,9 @@ llvm::Expected<lldb::ValueObjectSP> UnaryOpNode::Accept(Visitor *v) const {
return v->Visit(this);
}
+llvm::Expected<lldb::ValueObjectSP>
+ArraySubscriptNode::Accept(Visitor *v) const {
+ return v->Visit(this);
+}
+
} // namespace lldb_private::dil
diff --git a/lldb/source/ValueObject/DILEval.cpp b/lldb/source/ValueObject/DILEval.cpp
index 15a66d4866305..527017da7c019 100644
--- a/lldb/source/ValueObject/DILEval.cpp
+++ b/lldb/source/ValueObject/DILEval.cpp
@@ -18,6 +18,22 @@
namespace lldb_private::dil {
+static lldb::ValueObjectSP
+ArrayToPointerConversion(lldb::ValueObjectSP valobj,
+ std::shared_ptr<ExecutionContextScope> ctx) {
+ assert(valobj->IsArrayType() &&
+ "an argument to array-to-pointer conversion must be an array");
+
+ uint64_t addr = valobj->GetLoadAddress();
+ llvm::StringRef name = "result";
+ ExecutionContext exe_ctx;
+ ctx->CalculateExecutionContext(exe_ctx);
+ return ValueObject::CreateValueObjectFromAddress(
+ name, addr, exe_ctx,
+ valobj->GetCompilerType().GetArrayElementType(ctx.get()).GetPointerType(),
+ /* do_deref */ false);
+}
+
static lldb::ValueObjectSP LookupStaticIdentifier(
VariableList &variable_list, std::shared_ptr<StackFrame> exe_scope,
llvm::StringRef name_ref, llvm::StringRef unqualified_name) {
@@ -214,6 +230,45 @@ llvm::Expected<lldb::ValueObjectSP> Interpreter::Evaluate(const ASTNode *node) {
return value_or_error;
}
+static CompilerType GetBasicType(std::shared_ptr<ExecutionContextScope> ctx,
+ lldb::BasicType basic_type) {
+ static std::unordered_map<lldb::BasicType, CompilerType> basic_types;
+ auto type = basic_types.find(basic_type);
+ if (type != basic_types.end()) {
+ std::string type_name((type->second).GetTypeName().AsCString());
+ // Only return the found type if it's valid.
+ if (type_name != "<invalid>")
+ return type->second;
+ }
+
+ lldb::TargetSP target_sp = ctx->CalculateTarget();
+ if (target_sp) {
+ for (auto type_system_sp : target_sp->GetScratchTypeSystems())
+ if (auto compiler_type =
+ type_system_sp->GetBasicTypeFromAST(basic_type)) {
+ basic_types.insert({basic_type, compiler_type});
+ return compiler_type;
+ }
+ }
+ CompilerType empty_type;
+ return empty_type;
+}
+
+llvm::Expected<lldb::ValueObjectSP>
+Interpreter::Visit(const ScalarLiteralNode *node) {
+ CompilerType result_type = GetBasicType(m_exe_ctx_scope, node->GetType());
+ Scalar value = node->GetValue();
+
+ if (result_type.IsInteger() || result_type.IsNullPtrType() ||
+ result_type.IsPointerType()) {
+ llvm::APInt val = value.GetAPSInt();
+ return ValueObject::CreateValueObjectFromAPInt(m_target, val, result_type,
+ "result");
+ }
+
+ return lldb::ValueObjectSP();
+}
+
llvm::Expected<lldb::ValueObjectSP>
Interpreter::Visit(const IdentifierNode *node) {
lldb::DynamicValueType use_dynamic = m_default_dynamic;
@@ -272,4 +327,108 @@ Interpreter::Visit(const UnaryOpNode *node) {
m_expr, "invalid ast: unexpected binary operator", node->GetLocation());
}
+lldb::ValueObjectSP Interpreter::PointerAdd(lldb::ValueObjectSP lhs,
+ int64_t offset) {
+ uint64_t byte_size = 0;
+ if (auto temp = lhs->GetCompilerType().GetPointeeType().GetByteSize(
+ lhs->GetTargetSP().get()))
+ byte_size = *temp;
+ uintptr_t addr = lhs->GetValueAsUnsigned(0) + offset * byte_size;
+
+ llvm::StringRef name = "result";
+ ExecutionContext exe_ctx(m_target.get(), false);
+ return ValueObject::CreateValueObjectFromAddress(name, addr, exe_ctx,
+ lhs->GetCompilerType(),
+ /* do_deref */ false);
+}
+
+llvm::Expected<lldb::ValueObjectSP>
+Interpreter::Visit(const ArraySubscriptNode *node) {
+ auto lhs_or_err = Evaluate(node->lhs());
+ if (!lhs_or_err) {
+ return lhs_or_err;
+ }
+ lldb::ValueObjectSP base = *lhs_or_err;
+ auto rhs_or_err = Evaluate(node->rhs());
+ if (!rhs_or_err) {
+ return rhs_or_err;
+ }
+ lldb::ValueObjectSP index = *rhs_or_err;
+
+ Status error;
+ if (base->GetCompilerType().IsReferenceType()) {
+ base = base->Dereference(error);
+ if (error.Fail())
+ return error.ToError();
+ }
+ if (index->GetCompilerType().IsReferenceType()) {
+ index = index->Dereference(error);
+ if (error.Fail())
+ return error.ToError();
+ }
+
+ auto index_type = index->GetCompilerType();
+ if (!index_type.IsIntegerOrUnscopedEnumerationType())
+ return llvm::make_error<DILDiagnosticError>(
+ m_expr, "array subscript is not an integer", node->GetLocation());
+
+ // Check to see if 'base' has a synthetic value; if so, try using that.
+ if (base->HasSyntheticValue()) {
+ lldb::ValueObjectSP synthetic = base->GetSyntheticValue();
+ if (synthetic && synthetic != base) {
+ uint32_t num_children = synthetic->GetNumChildrenIgnoringErrors();
+ // Verify that the 'index' is not out-of-range for the declared type.
+ if (index->GetValueAsSigned(0) >= num_children) {
+ auto message =
+ llvm::formatv("array index {0} is not valid for \"({1}) {2}\"",
+ index->GetValueAsSigned(0),
+ base->GetTypeName().AsCString("<invalid type>"),
+ base->GetName().AsCString());
+ return llvm::make_error<DILDiagnosticError>(m_expr, message,
+ node->GetLocation());
+ }
+
+ uint64_t child_idx = index->GetValueAsUnsigned(0);
+ if (static_cast<uint32_t>(child_idx) <
+ synthetic->GetNumChildrenIgnoringErrors()) {
+ lldb::ValueObjectSP child_valobj_sp =
+ synthetic->GetChildAtIndex(child_idx);
+ if (child_valobj_sp) {
+ return child_valobj_sp;
+ }
+ }
+ }
+ }
+
+ auto base_type = base->GetCompilerType();
+ if (!base_type.IsPointerType() && !base_type.IsArrayType())
+ return llvm::make_error<DILDiagnosticError>(
+ m_expr, "subscripted value is not an array or pointer",
+ node->GetLocation());
+ if (base_type.IsPointerToVoid())
+ return llvm::make_error<DILDiagnosticError>(
+ m_expr, "subscript of pointer to incomplete type 'void'",
+ node->GetLocation());
+
+ if (base_type.IsArrayType())
+ base = ArrayToPointerConversion(base, m_exe_ctx_scope);
+
+ CompilerType item_type = base->GetCompilerType().GetPointeeType();
+ lldb::addr_t base_addr = base->GetValueAsUnsigned(0);
+
+ llvm::StringRef name = "result";
+ ExecutionContext exe_ctx(m_target.get(), false);
+ // Create a pointer and add the index, i.e. "base + index".
+ lldb::ValueObjectSP value =
+ PointerAdd(ValueObject::CreateValueObjectFromAddress(
+ name, base_addr, exe_ctx, item_type.GetPointerType(),
+ /*do_deref=*/false),
+ index->GetValueAsSigned(0));
+
+ lldb::ValueObjectSP val2 = value->Dereference(error);
+ if (error.Fail())
+ return error.ToError();
+ return val2;
+}
+
} // namespace lldb_private::dil
diff --git a/lldb/source/ValueObject/DILLexer.cpp b/lldb/source/ValueObject/DILLexer.cpp
index b9c2e7971e3b4..3222032feef19 100644
--- a/lldb/source/ValueObject/DILLexer.cpp
+++ b/lldb/source/ValueObject/DILLexer.cpp
@@ -13,6 +13,7 @@
#include "lldb/ValueObject/DILLexer.h"
#include "lldb/Utility/Status.h"
+#include "lldb/ValueObject/DILParser.h"
#include "llvm/ADT/StringSwitch.h"
namespace lldb_private::dil {
@@ -29,8 +30,14 @@ llvm::StringRef Token::GetTokenName(Kind kind) {
return "identifier";
case Kind::l_paren:
return "l_paren";
+ case Kind::l_square:
+ return "l_square";
+ case Kind::numeric_constant:
+ return "numeric_constant";
case Kind::r_paren:
return "r_paren";
+ case Kind::r_square:
+ return "r_square";
case Token::star:
return "star";
}
@@ -57,6 +64,29 @@ static std::optional<llvm::StringRef> IsWord(llvm::StringRef expr,
return candidate;
}
+static void ConsumeNumberBody(char &prev_ch, llvm::StringRef::iterator &cur_pos,
+ llvm::StringRef expr) {
+ while (cur_pos != expr.end() &&
+ (IsDigit(*cur_pos) || IsLetter(*cur_pos) || *cur_pos == '_')) {
+ prev_ch = *cur_pos;
+ cur_pos++;
+ }
+}
+
+static std::optional<llvm::StringRef> IsNumber(llvm::StringRef expr,
+ llvm::StringRef &remainder) {
+ llvm::StringRef::iterator cur_pos = remainder.begin();
+ llvm::StringRef::iterator start = cur_pos;
+ char prev_ch = 0;
+ if (IsDigit(*start)) {
+ ConsumeNumberBody(prev_ch, cur_pos, expr);
+ llvm::StringRef number = expr.substr(start - expr.begin(), cur_pos - start);
+ if (remainder.consume_front(number))
+ return number;
+ }
+ return std::nullopt;
+}
+
llvm::Expected<DILLexer> DILLexer::Create(llvm::StringRef expr) {
std::vector<Token> tokens;
llvm::StringRef remainder = expr;
@@ -81,13 +111,19 @@ llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr,
return Token(Token::eof, "", (uint32_t)expr.size());
uint32_t position = cur_pos - expr.begin();
+ std::optional<llvm::StringRef> maybe_number = IsNumber(expr, remainder);
+ if (maybe_number) {
+ std::string number = (*maybe_number).str();
+ return Token(Token::numeric_constant, number, position);
+ }
std::optional<llvm::StringRef> maybe_word = IsWord(expr, remainder);
if (maybe_word)
return Token(Token::identifier, maybe_word->str(), position);
constexpr std::pair<Token::Kind, const char *> operators[] = {
- {Token::amp, "&"}, {Token::coloncolon, "::"}, {Token::l_paren, "("},
- {Token::r_paren, ")"}, {Token::star, "*"},
+ {Token::amp, "&"}, {Token::coloncolon, "::"}, {Token::l_paren, "("},
+ {Token::l_square, "["}, {Token::r_paren, ")"}, {Token::r_square, "]"},
+ {Token::star, "*"},
};
for (auto [kind, str] : operators) {
if (remainder.consume_front(str))
@@ -95,7 +131,8 @@ llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr,
}
// Unrecognized character(s) in string; unable to lex it.
- return llvm::createStringError("Unable to lex input string");
+ return llvm::make_error<DILDiagnosticError>(expr, "unrecognized token",
+ position);
}
} // namespace lldb_private::dil
diff --git a/lldb/source/ValueObject/DILParser.cpp b/lldb/source/ValueObject/DILParser.cpp
index 2c78eae8cf6bf..24a6f0c909a0a 100644
--- a/lldb/source/ValueObject/DILParser.cpp
+++ b/lldb/source/ValueObject/DILParser.cpp
@@ -111,7 +111,36 @@ ASTNodeUP DILParser::ParseUnaryExpression() {
llvm_unreachable("invalid token kind");
}
}
- return ParsePrimaryExpression();
+ return ParsePostfixExpression();
+}
+
+// Parse a postfix_expression.
+//
+// postfix_expression:
+// primary_expression
+// postfix_expression "[" expression "]"
+//
+ASTNodeUP DILParser::ParsePostfixExpression() {
+ ASTNodeUP lhs = ParsePrimaryExpression();
+ while (CurToken().Is(Token::l_square)) {
+ uint32_t loc = CurToken().GetLocation();
+ Token token = CurToken();
+ switch (token.GetKind()) {
+ case Token::l_square: {
+ m_dil_lexer.Advance();
+ auto rhs = ParseExpression();
+ Expect(Token::r_square);
+ m_dil_lexer.Advance();
+ lhs = std::make_unique<ArraySubscriptNode>(loc, std::move(lhs),
+ std::move(rhs));
+ break;
+ }
+ default:
+ llvm_unreachable("invalid token");
+ }
+ }
+
+ return lhs;
}
// Parse a primary_expression.
@@ -121,6 +150,8 @@ ASTNodeUP DILParser::ParseUnaryExpression() {
// "(" expression ")"
//
ASTNodeUP DILParser::ParsePrimaryExpression() {
+ if (CurToken().Is(Token::numeric_constant))
+ return ParseNumericLiteral();
if (CurToken().IsOneOf({Token::coloncolon, Token::identifier})) {
// Save the source location for the diagnostics message.
uint32_t loc = CurToken().GetLocation();
@@ -280,6 +311,52 @@ void DILParser::BailOut(const std::string &error, uint32_t loc,
m_dil_lexer.ResetTokenIdx(m_dil_lexer.NumLexedTokens() - 1);
}
+// Parse a numeric_literal.
+//
+// numeric_literal:
+// ? Token::numeric_constant ?
+//
+ASTNodeUP DILParser::ParseNumericLiteral() {
+ Expect(Token::numeric_constant);
+ ASTNodeUP numeric_constant = ParseNumericConstant();
+ if (numeric_constant->GetKind() == NodeKind::eErrorNode) {
+ BailOut(llvm::formatv("Failed to parse token as numeric-constant: {0}",
+ CurToken()),
+ CurToken().GetLocation(), CurToken().GetSpelling().length());
+ return std::make_unique<ErrorNode>();
+ }
+ m_dil_lexer.Advance();
+ return numeric_constant;
+}
+
+static constexpr std::pair<const char *, lldb::BasicType> type_suffixes[] = {
+ {"ull", lldb::eBasicTypeUnsignedLongLong},
+ {"ul", lldb::eBasicTypeUnsignedLong},
+ {"u", lldb::eBasicTypeUnsignedInt},
+ {"ll", lldb::eBasicTypeLongLong},
+ {"l", lldb::eBasicTypeLong},
+};
+
+ASTNodeUP DILParser::ParseNumericConstant() {
+ Token token = CurToken();
+ auto spelling = token.GetSpelling();
+ llvm::StringRef spelling_ref = spelling;
+ lldb::BasicType type = lldb::eBasicTypeInt;
+ for (auto [suffix, t] : type_suffixes) {
+ if (spelling_ref.consume_back_insensitive(suffix)) {
+ type = t;
+ break;
+ }
+ }
+ llvm::APInt raw_value;
+ if (!spelling_ref.getAsInteger(0, raw_value)) {
+ Scalar scalar_value(raw_value);
+ return std::make_unique<ScalarLiteralNode>(token.GetLocation(), type,
+ scalar_value);
+ }
+ return std::make_unique<ErrorNode>();
+}
+
void DILParser::Expect(Token::Kind kind) {
if (CurToken().IsNot(kind)) {
BailOut(llvm::formatv("expected {0}, got: {1}", kind, CurToken()),
diff --git a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/Makefile b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/Makefile
new file mode 100644
index 0000000000000..99998b...
[truncated]
|
One thing I'm not sure about here: will converting array type to a pointer, adding the index and then dereferencing work for Swift? |
I don't have an answer to that, but I do know that it's possible to index pointers in the current implementation. I suggest checking out how it achieves that and seeing if it can be translated to here. |
Ah, I will look into this further, thanks. |
Swift doesn't have pointers really. It does pass some things by value (called structs) and others by reference (called classes), but it doesn't let you know how. So this ambiguity between "pointer to object" and "pointer to contiguous buffer of objects" doesn't come up. |
So what happens if I use |
Swift has an out for dealing with pointers of classes (for instance to help pass objects back and forth between swift & C-based languages). So asking for the pointer type for a class type doesn't get None (which you would expect for a language that doesn't have the notion of pointers.) Instead you get We probably need to ask the language "Are Arrays contiguous buffers of objects for you", and return an error from here if they are not. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Apart from the pointer indexing question, this PR also opens the question of "how should the numbers be represented". Here you represent them as ValueObjects, which means you have to give them types, which means you have to find a type system for them, ...
I'm not sure I agree with how are those steps implemented (in fact, I'm pretty sure I don't agree with at least some of those steps). In order to keep this PR focused, and because a first-class number representation is not necessary for replacing the current "frame var", my suggestion would be to dumb down the implementation of postfix_expression
: Instead of postfix_expression "[" expression "]"
, you could implement something like postfix_expression "[" integer "]"
. Then we can store the RHS of the []
expression directly as a (u)int64 (or AP(S)Int, or whatever), and we can completely sidestep the question its type.
My reason for that is that I believe that after this is implemented (and @cmtice finishes member access), we will be in a position to flip the flag on this and make it the default -- which I think is the real proof of the pudding, and I sort of expect we will need to tweak the implementation until everything settles into place. I think it's better to not introduce additional complexity while we're doing that.
ASTNode *lhs() const { return m_lhs.get(); } | ||
ASTNode *rhs() const { return m_rhs.get(); } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's use the GetFoo convention for this (and also in UnaryOpNode, which I didn't catch at the time)
lldb/source/ValueObject/DILEval.cpp
Outdated
return ValueObject::CreateValueObjectFromAddress( | ||
name, addr, exe_ctx, | ||
valobj->GetCompilerType().GetArrayElementType(ctx.get()).GetPointerType(), | ||
/* do_deref */ false); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
/* do_deref */ false); | |
/*do_deref=*/false); |
lldb/source/ValueObject/DILLexer.cpp
Outdated
static void ConsumeNumberBody(char &prev_ch, llvm::StringRef::iterator &cur_pos, | ||
llvm::StringRef expr) { | ||
while (cur_pos != expr.end() && | ||
(IsDigit(*cur_pos) || IsLetter(*cur_pos) || *cur_pos == '_')) { | ||
prev_ch = *cur_pos; | ||
cur_pos++; | ||
} | ||
} | ||
|
||
static std::optional<llvm::StringRef> IsNumber(llvm::StringRef expr, | ||
llvm::StringRef &remainder) { | ||
llvm::StringRef::iterator cur_pos = remainder.begin(); | ||
llvm::StringRef::iterator start = cur_pos; | ||
char prev_ch = 0; | ||
if (IsDigit(*start)) { | ||
ConsumeNumberBody(prev_ch, cur_pos, expr); | ||
llvm::StringRef number = expr.substr(start - expr.begin(), cur_pos - start); | ||
if (remainder.consume_front(number)) | ||
return number; | ||
} | ||
return std::nullopt; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this should be equivalent to what you're doing here.
static void ConsumeNumberBody(char &prev_ch, llvm::StringRef::iterator &cur_pos, | |
llvm::StringRef expr) { | |
while (cur_pos != expr.end() && | |
(IsDigit(*cur_pos) || IsLetter(*cur_pos) || *cur_pos == '_')) { | |
prev_ch = *cur_pos; | |
cur_pos++; | |
} | |
} | |
static std::optional<llvm::StringRef> IsNumber(llvm::StringRef expr, | |
llvm::StringRef &remainder) { | |
llvm::StringRef::iterator cur_pos = remainder.begin(); | |
llvm::StringRef::iterator start = cur_pos; | |
char prev_ch = 0; | |
if (IsDigit(*start)) { | |
ConsumeNumberBody(prev_ch, cur_pos, expr); | |
llvm::StringRef number = expr.substr(start - expr.begin(), cur_pos - start); | |
if (remainder.consume_front(number)) | |
return number; | |
} | |
return std::nullopt; | |
} | |
static bool IsNumberBodyChar(char ch) { // Or make it a lambda | |
return IsDigit(ch) || IsLetter(ch) || ch == '_'; | |
} | |
static std::optional<llvm::StringRef> IsNumber(llvm::StringRef expr, | |
llvm::StringRef &remainder) { | |
llvm::StringRef::iterator cur_pos = remainder.begin(); | |
llvm::StringRef::iterator start = cur_pos; | |
if (remainder[0]) { | |
llvm::StringRef number = remainder.take_while(IsNumberBodyChar); | |
remainder = remainder.drop_front(number.size()); | |
return number; | |
} | |
return std::nullopt; | |
} |
switch (token.GetKind()) { | ||
case Token::l_square: { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's the point? You've already checked the type three lines above
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is for when other postfix expressions get added here, .
and ->
from the other PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay... I guess that makes sense...
lldb/docs/dil-expr-lang.ebnf
Outdated
@@ -24,6 +28,8 @@ qualified_id = ["::"] [nested_name_specifier] unqualified_id | |||
|
|||
identifier = ? C99 Identifier ? ; | |||
|
|||
numeric_literal = ? C99 Integer constant ? ; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the C99 part isn't really true. Maybe we should have a separate section to explain the number format we accept?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, 0b...
and 0o...
formats supported here are missing from C99. How should I explain the format without writing its full grammar as well?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think you need to write the formal grammar. I'd be fine with just some text blurb about the accepted formats. Right now, we don't to anything fancy, so you could just say "decimal, hex, (and octal/binary?) numbers are accepted".
Why is that a bad thing? Can we do math operations later without types? Plus it's a unified interface, node evaluation returns a
I mean... this works and is a basis for future patches, why remove something that we will have to bring back shortly afterwards? After replacing frame var, DIL will just have a little bit of extra capabilities, like using another variable as an index.
We still need to implement bit extraction that current |
I found |
Because I wanted to avoid this getting bogged down in the discussion about the types of numbers. I don't really mind the "extra capability" of indexing an array with a another variable. The part I have problem with is the precedent it sets about the representation of numbers. You're right that we can't do math operations without (implicitly or explicitly) assigning them some type, but that's exactly the part I think needs more discussion. Right now, you're assigning the type based on the first type system you find (which is likely going to be C), but I think that's not a good choice. If you're debugging some swift code, I think you'd be surprised if Using the type system of the currently selected frame might be better, but I'm not convinced that the right choice either. Since you support looking up global variables you can easily end up with a global variable from a different language, and then we have the question of what to do with expressions like Which brings me to another idea which might work (but again, I'm not saying it's the right one), which is to give constants some neutral type -- for example, the one (implicitly) defined by the operations on the My point is: the space of possible options is very big here, but none of this is necessary to make array indexing (with a constant) work. I agree with your "it works" assertion (but I think that a much simpler implementation would also "work"), but not with the "it's a basis for future patches" part. I think that part needs more discussion. I don't think you need to change the evaluation interface for this -- yet. It's true that some of the ideas above would require that, but that's very much in the air. For now, it should be sufficient to make the array index a raw integer (instead of an AST node). Then there's nothing to evaluate, and you can just directly take the number and index with it.
Oh wow, I didn't even know that existed. I guess that means we should implement that as well, but I don't think it changes the equation fundamentally. Right now, the two numbers can be stored as... numbers, and later that could be changed to a recursive |
Okay, I see the problem, I didn't really think that retrieving the type system when debugging Swift code would return C type system. Why is it like this though? Is there a reliable way to get a type system of the main language? I don't have any other suggestion how to implement this, so I guess I'll stick to having an integer within the subscript node until we have a better idea. |
@labath |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm sorry, I put this off because it wasn't clear how to respond immediately -- and then it dropped off my radar.
I have a bunch of comments, but overall, I think this could work.
My main objection is to the use of the APInt class. If you want to use it (which I'm fine with, but I also think a plain (u)int64_t would work for this purpose as well), then I think you should use it properly: using the operations defined on the class itself (e.g. slt
/ult
) -- instead of converting it to an plain integer first chance you get. As it stands now your parser will happily accept an incredibly long integer, but then you'll assert when getZExtValue
cannot convert that to an uint64_t. There's also some confusion about whether the value is signed or unsigned. You can't just switch between getZExtValue
and getSExtValue
and expect the result to make sense (hint: getSExtValue
of "0x80" parsed into an APInt is -128)
lldb/source/ValueObject/DILEval.cpp
Outdated
if (!lhs_or_err) { | ||
return lhs_or_err; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (!lhs_or_err) { | |
return lhs_or_err; | |
} | |
if (!lhs_or_err) | |
return lhs_or_err; |
lldb/source/ValueObject/DILEval.cpp
Outdated
if (base->HasSyntheticValue()) { | ||
lldb::ValueObjectSP synthetic = base->GetSyntheticValue(); | ||
if (synthetic && synthetic != base) { | ||
uint32_t num_children = synthetic->GetNumChildrenIgnoringErrors(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
GetNumChildren can actually be an expensive operation (see #139805 (comment)). Just ask for the child with the given index and then deal with the error.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I removed the other 2 of these calls, but this one specifically I couldn't: GetChildAtIndex
for a synthetic value calls ValueObjectSynthetic::GetChildAtIndex
. That one with can_create = true
creates a value regardless of the index and always returns something, like a 0 value for a out of bounds subscript of a vector; and with can_create = false
it doesn't create anything, even if index is within bounds and always returns an error.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's surprising. It's not the behavior I see for e.g. std::vector (I have checked that this call actually reaches ValueObjectSynthetic::GetChildAtIndex
):
(lldb) script lldb.frame.FindVariable("v").GetChildAtIndex(46)
No value
Is it possible that some (simple) data formatter implements GetChildAtIndex as return foo
without checking whether the index argument is in range?
If that's the case, I might actually argue that we should just let that happen, but seeing that the current frame var has this check as well, I can be convinced to keep it. Nonetheless, I'd like to change this call to GetNumChildren(/*max=*/index+1)
, for two reasons:
- if we cannot actually get the number of children, the error is probably going to be more interesting than "index out of bounds", so it'd be better to return that.
- the
max
argument avoids computing the precise number of children when all you want to know is whether it has "at least N" children
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it possible that some (simple) data formatter implements GetChildAtIndex as return foo without checking whether the index argument is in range?
One thing I can add that I noticed: that code worked when I tried it from console lldb, but didn't work on the same executable and input string when called from Python tests. I'm not sure why.
I changed the call to GetNumChildren
, I think I'd prefer a vector telling me that I'm out of bounds instead of returning 0 like it's a correct value from the vector.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I'd prefer a vector telling me that I'm out of bounds instead of returning 0 like it's a correct value from the vector.
I mean, I definitely agree that should be the high level functionality. My questions was more of like "whose responsibility is to make sure this happens?"
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One thing I can add that I noticed: that code worked when I tried it from console lldb, but didn't work on the same executable and input string when called from Python tests.
I didn't realize that the console executable got linked to GNU libstdc++ and subsequently used its data formatters. So, I'm guessing GNU formatter doesn't let you get the child with index out of bounds, but LLDB formatter does. Technically, in runtime doing vector[index_out_of_bounds]
returns 0, so neither is incorrect.
I mean, I definitely agree that should be the high level functionality. My questions was more of like "whose responsibility is to make sure this happens?"
I guess if we want a specific behavior for our system, we need to check the index ourselves and not rely on the formatter. Maybe there are other uses of the formatter where out of bounds is expected to return 0.
lldb/source/ValueObject/DILEval.cpp
Outdated
if (base->HasSyntheticValue()) { | ||
lldb::ValueObjectSP synthetic = base->GetSyntheticValue(); | ||
if (synthetic && synthetic != base) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we simplify this? AFAICT, GetSyntheticValue should return null if there's no synthtic value, and it should never the same object, so could this be just if (lldb::ValueObjectSP synthetic = base->GetSyntheticValue())
?
lldb/source/ValueObject/DILLexer.cpp
Outdated
if (maybe_number) { | ||
std::string number = (*maybe_number).str(); | ||
return Token(Token::numeric_constant, number, position); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (maybe_number) { | |
std::string number = (*maybe_number).str(); | |
return Token(Token::numeric_constant, number, position); | |
} | |
if (maybe_number) | |
return Token(Token::numeric_constant, maybe_number->str(), position); |
mainly because it avoids making a copy of the std::string object (without resorting to std::move)
lldb/source/ValueObject/DILEval.cpp
Outdated
if (base->GetCompilerType().IsReferenceType()) { | ||
base = base->Dereference(error); | ||
if (error.Fail()) | ||
return error.ToError(); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's the purpose of that ? GetChildAtIndex, GetChildMemberWithName, etc. work fine on reference types.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It was for type checks later, I forgot that there is a GetNonReferenceType()
.
lldb/source/ValueObject/DILEval.cpp
Outdated
uint32_t num_children = synthetic->GetNumChildrenIgnoringErrors(); | ||
// Verify that the 'index' is not out-of-range for the declared type. | ||
if (child_idx >= num_children) { | ||
auto message = llvm::formatv( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure this is actually correct, or expected. The type of message here is going to be formatv_object<goo>
, and I wouldn't be surprised if it holds references to objects to objects that get destroyed at the full statement. Better explicitly make this a std::string.
lldb/source/ValueObject/DILEval.cpp
Outdated
auto base_type = base->GetCompilerType(); | ||
if (!base_type.IsPointerType() && !base_type.IsArrayType()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is one of the (few) type checks that I think make sense, but I think this would read better if it was structured like:
if (pointer) ...
else if (array) ...
else /*not pointer or array*/
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tried that, but because I need to return base->GetSyntheticArrayMember
in any case other than array, it didn't end up looking better.
switch (token.GetKind()) { | ||
case Token::l_square: { | ||
m_dil_lexer.Advance(); | ||
auto rhs = ParseIntegerConstant(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
make type explicit
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Out of curiosity, why?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LLVM generally has a very cautious stance towards auto: https://llvm.org/docs/CodingStandards.html#use-auto-type-deduction-to-make-code-more-readable
In this case, the type of rhs is not obvious from the immediate context, and for the correctness of the code, it is important to know whether the function returns int
, optional<int>
, Expected<int>
or something else.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see, thank you!
switch (token.GetKind()) { | ||
case Token::l_square: { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay... I guess that makes sense...
llvm::Expected<lldb::ValueObjectSP> Accept(Visitor *v) const override; | ||
|
||
ASTNode *GetBase() const { return m_base.get(); } | ||
const llvm::APInt *GetIndex() const { return &m_index; } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this should return a (const) reference to make it clear it can't be null.
(I know the same can be said about the GetBase, and I'd actually do that there as well, but it's less of a surprise there since the object is already stored as a pointer)
It depends on how you retrieve the type system. You were iterating through the type systems of the target, which will return all of them because C and swift (and other languages) can coexist within the same process. There isn't such a thing as a "pure swift" application, so we should have a story for how the two will interact. Maybe story can be "we don't let the two interact", but it should be an explicit choice and we should thing through the consequences.
I don't think "main language" makes sense as a concept. The fact that Swift makes up 99% of your binary is not going to help you if what you really want to do is debug that 1% written in C. It might make sense to use the language of the currently selected frame but even there we need to be careful. The fact that you can access global variables means you can end up with non-local types even in this case.
I don't know why is that the case, but it wouldn't surprise me if that's somehow related to the fact that the BasicType enum is already very C specific. |
I think I addressed everything I could, please take a look again. |
… index from arrays
e88379e
to
3f295ce
Compare
lldb/source/ValueObject/DILEval.cpp
Outdated
if (!num_children) | ||
return llvm::make_error<DILDiagnosticError>( | ||
m_expr, toString(num_children.takeError()), node->GetLocation()); | ||
// Verify that the 'index' is not out-of-range for the declared type. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
technically, this has nothing to do with the type. A formatter can (and we have formatters that do that) return different numbers of children for different values of the same type.
I think you can just delete the comment as it's kinda obvious..
…to DIL" (#141059) Reverts llvm/llvm-project#138551
Reapply #138551 with an xfailed test on Windows
One of the tests added by this was flaky on Linaro's Windows on Arm bot, I've skipped it for now and #141738 will fix it properly. |
Reapply llvm#138551 with an xfailed test on Windows
No description provided.