Skip to content

Commit fe51d8a

Browse files
authored
[LLDB] Add array subscription and integer parsing to DIL (#141102)
Reapply #138551 with an xfailed test on Windows
1 parent 014f4e9 commit fe51d8a

File tree

13 files changed

+353
-24
lines changed

13 files changed

+353
-24
lines changed

lldb/docs/dil-expr-lang.ebnf

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,17 @@ unary_expression = postfix_expression
1010
1111
unary_operator = "*" | "&" ;
1212
13-
postfix_expresson = primary_expression
14-
| postfix_expression "." id_expression
15-
| postfix_expression "->" id_expression ;
13+
postfix_expression = primary_expression
14+
| postfix_expression "[" integer_literal "]"
15+
| postfix_expression "." id_expression
16+
| postfix_expression "->" id_expression ;
1617
1718
primary_expression = id_expression
18-
| "(" expression ")" ;
19+
| "(" expression ")" ;
1920
2021
id_expression = unqualified_id
2122
| qualified_id
22-
| register ;
23+
| register ;
2324
2425
unqualified_id = identifier ;
2526
@@ -28,6 +29,8 @@ qualified_id = ["::"] [nested_name_specifier] unqualified_id
2829
2930
identifier = ? C99 Identifier ? ;
3031
32+
integer_literal = ? Integer constant: hexademical, decimal, octal, binary ? ;
33+
3134
register = "$" ? Register name ? ;
3235
3336
nested_name_specifier = type_name "::"

lldb/include/lldb/ValueObject/DILAST.h

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ namespace lldb_private::dil {
1818

1919
/// The various types DIL AST nodes (used by the DIL parser).
2020
enum class NodeKind {
21+
eArraySubscriptNode,
2122
eErrorNode,
2223
eIdentifierNode,
2324
eMemberOfNode,
@@ -120,8 +121,8 @@ class UnaryOpNode : public ASTNode {
120121

121122
llvm::Expected<lldb::ValueObjectSP> Accept(Visitor *v) const override;
122123

123-
UnaryOpKind kind() const { return m_kind; }
124-
ASTNode *operand() const { return m_operand.get(); }
124+
UnaryOpKind GetKind() const { return m_kind; }
125+
ASTNode *GetOperand() const { return m_operand.get(); }
125126

126127
static bool classof(const ASTNode *node) {
127128
return node->GetKind() == NodeKind::eUnaryOpNode;
@@ -132,6 +133,26 @@ class UnaryOpNode : public ASTNode {
132133
ASTNodeUP m_operand;
133134
};
134135

136+
class ArraySubscriptNode : public ASTNode {
137+
public:
138+
ArraySubscriptNode(uint32_t location, ASTNodeUP base, int64_t index)
139+
: ASTNode(location, NodeKind::eArraySubscriptNode),
140+
m_base(std::move(base)), m_index(index) {}
141+
142+
llvm::Expected<lldb::ValueObjectSP> Accept(Visitor *v) const override;
143+
144+
ASTNode *GetBase() const { return m_base.get(); }
145+
int64_t GetIndex() const { return m_index; }
146+
147+
static bool classof(const ASTNode *node) {
148+
return node->GetKind() == NodeKind::eArraySubscriptNode;
149+
}
150+
151+
private:
152+
ASTNodeUP m_base;
153+
int64_t m_index;
154+
};
155+
135156
/// This class contains one Visit method for each specialized type of
136157
/// DIL AST node. The Visit methods are used to dispatch a DIL AST node to
137158
/// the correct function in the DIL expression evaluator for evaluating that
@@ -145,6 +166,8 @@ class Visitor {
145166
Visit(const MemberOfNode *node) = 0;
146167
virtual llvm::Expected<lldb::ValueObjectSP>
147168
Visit(const UnaryOpNode *node) = 0;
169+
virtual llvm::Expected<lldb::ValueObjectSP>
170+
Visit(const ArraySubscriptNode *node) = 0;
148171
};
149172

150173
} // namespace lldb_private::dil

lldb/include/lldb/ValueObject/DILEval.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ class Interpreter : Visitor {
5252
Visit(const IdentifierNode *node) override;
5353
llvm::Expected<lldb::ValueObjectSP> Visit(const MemberOfNode *node) override;
5454
llvm::Expected<lldb::ValueObjectSP> Visit(const UnaryOpNode *node) override;
55+
llvm::Expected<lldb::ValueObjectSP>
56+
Visit(const ArraySubscriptNode *node) override;
5557

5658
// Used by the interpreter to create objects, perform casts, etc.
5759
lldb::TargetSP m_target;

lldb/include/lldb/ValueObject/DILLexer.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,11 @@ class Token {
3030
eof,
3131
identifier,
3232
l_paren,
33+
l_square,
34+
numeric_constant,
3335
period,
3436
r_paren,
37+
r_square,
3538
star,
3639
};
3740

lldb/include/lldb/ValueObject/DILParser.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ class DILParser {
9595

9696
std::string ParseIdExpression();
9797
std::string ParseUnqualifiedId();
98+
std::optional<int64_t> ParseIntegerConstant();
9899

99100
void BailOut(const std::string &error, uint32_t loc, uint16_t err_len);
100101

lldb/source/ValueObject/DILAST.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,9 @@ llvm::Expected<lldb::ValueObjectSP> UnaryOpNode::Accept(Visitor *v) const {
2727
return v->Visit(this);
2828
}
2929

30+
llvm::Expected<lldb::ValueObjectSP>
31+
ArraySubscriptNode::Accept(Visitor *v) const {
32+
return v->Visit(this);
33+
}
34+
3035
} // namespace lldb_private::dil

lldb/source/ValueObject/DILEval.cpp

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,13 +240,13 @@ Interpreter::Visit(const IdentifierNode *node) {
240240
llvm::Expected<lldb::ValueObjectSP>
241241
Interpreter::Visit(const UnaryOpNode *node) {
242242
Status error;
243-
auto rhs_or_err = Evaluate(node->operand());
243+
auto rhs_or_err = Evaluate(node->GetOperand());
244244
if (!rhs_or_err)
245245
return rhs_or_err;
246246

247247
lldb::ValueObjectSP rhs = *rhs_or_err;
248248

249-
switch (node->kind()) {
249+
switch (node->GetKind()) {
250250
case UnaryOpKind::Deref: {
251251
lldb::ValueObjectSP dynamic_rhs = rhs->GetDynamicValue(m_use_dynamic);
252252
if (dynamic_rhs)
@@ -383,4 +383,51 @@ Interpreter::Visit(const MemberOfNode *node) {
383383
m_expr, errMsg, node->GetLocation(), node->GetFieldName().size());
384384
}
385385

386+
llvm::Expected<lldb::ValueObjectSP>
387+
Interpreter::Visit(const ArraySubscriptNode *node) {
388+
auto lhs_or_err = Evaluate(node->GetBase());
389+
if (!lhs_or_err)
390+
return lhs_or_err;
391+
lldb::ValueObjectSP base = *lhs_or_err;
392+
393+
// Check to see if 'base' has a synthetic value; if so, try using that.
394+
uint64_t child_idx = node->GetIndex();
395+
if (lldb::ValueObjectSP synthetic = base->GetSyntheticValue()) {
396+
llvm::Expected<uint32_t> num_children =
397+
synthetic->GetNumChildren(child_idx + 1);
398+
if (!num_children)
399+
return llvm::make_error<DILDiagnosticError>(
400+
m_expr, toString(num_children.takeError()), node->GetLocation());
401+
if (child_idx >= *num_children) {
402+
std::string message = llvm::formatv(
403+
"array index {0} is not valid for \"({1}) {2}\"", child_idx,
404+
base->GetTypeName().AsCString("<invalid type>"),
405+
base->GetName().AsCString());
406+
return llvm::make_error<DILDiagnosticError>(m_expr, message,
407+
node->GetLocation());
408+
}
409+
if (lldb::ValueObjectSP child_valobj_sp =
410+
synthetic->GetChildAtIndex(child_idx))
411+
return child_valobj_sp;
412+
}
413+
414+
auto base_type = base->GetCompilerType().GetNonReferenceType();
415+
if (!base_type.IsPointerType() && !base_type.IsArrayType())
416+
return llvm::make_error<DILDiagnosticError>(
417+
m_expr, "subscripted value is not an array or pointer",
418+
node->GetLocation());
419+
if (base_type.IsPointerToVoid())
420+
return llvm::make_error<DILDiagnosticError>(
421+
m_expr, "subscript of pointer to incomplete type 'void'",
422+
node->GetLocation());
423+
424+
if (base_type.IsArrayType()) {
425+
if (lldb::ValueObjectSP child_valobj_sp = base->GetChildAtIndex(child_idx))
426+
return child_valobj_sp;
427+
}
428+
429+
int64_t signed_child_idx = node->GetIndex();
430+
return base->GetSyntheticArrayMember(signed_child_idx, true);
431+
}
432+
386433
} // namespace lldb_private::dil

lldb/source/ValueObject/DILLexer.cpp

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#include "lldb/ValueObject/DILLexer.h"
1515
#include "lldb/Utility/Status.h"
16+
#include "lldb/ValueObject/DILParser.h"
1617
#include "llvm/ADT/StringSwitch.h"
1718

1819
namespace lldb_private::dil {
@@ -31,10 +32,16 @@ llvm::StringRef Token::GetTokenName(Kind kind) {
3132
return "identifier";
3233
case Kind::l_paren:
3334
return "l_paren";
35+
case Kind::l_square:
36+
return "l_square";
37+
case Kind::numeric_constant:
38+
return "numeric_constant";
3439
case Kind::period:
3540
return "period";
3641
case Kind::r_paren:
3742
return "r_paren";
43+
case Kind::r_square:
44+
return "r_square";
3845
case Token::star:
3946
return "star";
4047
}
@@ -61,6 +68,18 @@ static std::optional<llvm::StringRef> IsWord(llvm::StringRef expr,
6168
return candidate;
6269
}
6370

71+
static bool IsNumberBodyChar(char ch) { return IsDigit(ch) || IsLetter(ch); }
72+
73+
static std::optional<llvm::StringRef> IsNumber(llvm::StringRef expr,
74+
llvm::StringRef &remainder) {
75+
if (IsDigit(remainder[0])) {
76+
llvm::StringRef number = remainder.take_while(IsNumberBodyChar);
77+
remainder = remainder.drop_front(number.size());
78+
return number;
79+
}
80+
return std::nullopt;
81+
}
82+
6483
llvm::Expected<DILLexer> DILLexer::Create(llvm::StringRef expr) {
6584
std::vector<Token> tokens;
6685
llvm::StringRef remainder = expr;
@@ -85,22 +104,26 @@ llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr,
85104
return Token(Token::eof, "", (uint32_t)expr.size());
86105

87106
uint32_t position = cur_pos - expr.begin();
107+
std::optional<llvm::StringRef> maybe_number = IsNumber(expr, remainder);
108+
if (maybe_number)
109+
return Token(Token::numeric_constant, maybe_number->str(), position);
88110
std::optional<llvm::StringRef> maybe_word = IsWord(expr, remainder);
89111
if (maybe_word)
90112
return Token(Token::identifier, maybe_word->str(), position);
91113

92114
constexpr std::pair<Token::Kind, const char *> operators[] = {
93-
{Token::amp, "&"}, {Token::arrow, "->"}, {Token::coloncolon, "::"},
94-
{Token::l_paren, "("}, {Token::period, "."}, {Token::r_paren, ")"},
95-
{Token::star, "*"},
115+
{Token::amp, "&"}, {Token::arrow, "->"}, {Token::coloncolon, "::"},
116+
{Token::l_paren, "("}, {Token::l_square, "["}, {Token::period, "."},
117+
{Token::r_paren, ")"}, {Token::r_square, "]"}, {Token::star, "*"},
96118
};
97119
for (auto [kind, str] : operators) {
98120
if (remainder.consume_front(str))
99121
return Token(kind, str, position);
100122
}
101123

102124
// Unrecognized character(s) in string; unable to lex it.
103-
return llvm::createStringError("Unable to lex input string");
125+
return llvm::make_error<DILDiagnosticError>(expr, "unrecognized token",
126+
position);
104127
}
105128

106129
} // namespace lldb_private::dil

lldb/source/ValueObject/DILParser.cpp

Lines changed: 50 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -119,20 +119,46 @@ ASTNodeUP DILParser::ParseUnaryExpression() {
119119
//
120120
// postfix_expression:
121121
// primary_expression
122+
// postfix_expression "[" integer_literal "]"
122123
// postfix_expression "." id_expression
123124
// postfix_expression "->" id_expression
124125
//
125126
ASTNodeUP DILParser::ParsePostfixExpression() {
126127
ASTNodeUP lhs = ParsePrimaryExpression();
127-
while (CurToken().IsOneOf({Token::period, Token::arrow})) {
128+
while (CurToken().IsOneOf({Token::l_square, Token::period, Token::arrow})) {
129+
uint32_t loc = CurToken().GetLocation();
128130
Token token = CurToken();
129-
m_dil_lexer.Advance();
130-
Token member_token = CurToken();
131-
std::string member_id = ParseIdExpression();
132-
lhs = std::make_unique<MemberOfNode>(
133-
member_token.GetLocation(), std::move(lhs),
134-
token.GetKind() == Token::arrow, member_id);
131+
switch (token.GetKind()) {
132+
case Token::l_square: {
133+
m_dil_lexer.Advance();
134+
std::optional<int64_t> rhs = ParseIntegerConstant();
135+
if (!rhs) {
136+
BailOut(
137+
llvm::formatv("failed to parse integer constant: {0}", CurToken()),
138+
CurToken().GetLocation(), CurToken().GetSpelling().length());
139+
return std::make_unique<ErrorNode>();
140+
}
141+
Expect(Token::r_square);
142+
m_dil_lexer.Advance();
143+
lhs = std::make_unique<ArraySubscriptNode>(loc, std::move(lhs),
144+
std::move(*rhs));
145+
break;
146+
}
147+
case Token::period:
148+
case Token::arrow: {
149+
m_dil_lexer.Advance();
150+
Token member_token = CurToken();
151+
std::string member_id = ParseIdExpression();
152+
lhs = std::make_unique<MemberOfNode>(
153+
member_token.GetLocation(), std::move(lhs),
154+
token.GetKind() == Token::arrow, member_id);
155+
break;
156+
}
157+
default:
158+
llvm_unreachable("invalid token");
159+
}
135160
}
161+
136162
return lhs;
137163
}
138164

@@ -302,6 +328,23 @@ void DILParser::BailOut(const std::string &error, uint32_t loc,
302328
m_dil_lexer.ResetTokenIdx(m_dil_lexer.NumLexedTokens() - 1);
303329
}
304330

331+
// Parse a integer_literal.
332+
//
333+
// integer_literal:
334+
// ? Integer constant ?
335+
//
336+
std::optional<int64_t> DILParser::ParseIntegerConstant() {
337+
auto spelling = CurToken().GetSpelling();
338+
llvm::StringRef spelling_ref = spelling;
339+
int64_t raw_value;
340+
if (!spelling_ref.getAsInteger<int64_t>(0, raw_value)) {
341+
m_dil_lexer.Advance();
342+
return raw_value;
343+
}
344+
345+
return std::nullopt;
346+
}
347+
305348
void DILParser::Expect(Token::Kind kind) {
306349
if (CurToken().IsNot(kind)) {
307350
BailOut(llvm::formatv("expected {0}, got: {1}", kind, CurToken()),
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
CXX_SOURCES := main.cpp
2+
3+
include Makefile.rules

0 commit comments

Comments
 (0)