-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[ELF] Added struct Token
and changed next()
and peek()
to return Token
#100180
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-lld @llvm/pr-subscribers-lld-elf Author: Hongyu Chen (yugier) ChangesThis change is reflecting one of the issues to replace current linker script lexer with a fully stateful lexer. This change:
This change does not fully reflect the correct symbol/operator kind since our current Lexer is not stateful enough to support it; most of the symbols/operator checks are using StringRef comparison. The next steps:
Patch is 48.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100180.diff 4 Files Affected:
diff --git a/lld/ELF/ScriptLexer.cpp b/lld/ELF/ScriptLexer.cpp
index c8c02ab0f3e09..d4de18c417d8c 100644
--- a/lld/ELF/ScriptLexer.cpp
+++ b/lld/ELF/ScriptLexer.cpp
@@ -44,7 +44,7 @@ using namespace lld::elf;
// Returns a whole line containing the current token.
StringRef ScriptLexer::getLine() {
StringRef s = getCurrentMB().getBuffer();
- StringRef tok = tokens[pos - 1];
+ StringRef tok = tokens[pos - 1].val;
size_t pos = s.rfind('\n', tok.data() - s.data());
if (pos != StringRef::npos)
@@ -57,7 +57,7 @@ size_t ScriptLexer::getLineNumber() {
if (pos == 0)
return 1;
StringRef s = getCurrentMB().getBuffer();
- StringRef tok = tokens[pos - 1];
+ StringRef tok = tokens[pos - 1].val;
const size_t tokOffset = tok.data() - s.data();
// For the first token, or when going backwards, start from the beginning of
@@ -81,7 +81,7 @@ size_t ScriptLexer::getLineNumber() {
// Returns 0-based column number of the current token.
size_t ScriptLexer::getColumnNumber() {
- StringRef tok = tokens[pos - 1];
+ StringRef tok = tokens[pos - 1].val;
return tok.data() - getLine().data();
}
@@ -90,6 +90,22 @@ std::string ScriptLexer::getCurrentLocation() {
return (filename + ":" + Twine(getLineNumber())).str();
}
+std::string ScriptLexer::joinTokens(size_t begin, size_t end) {
+ auto itBegin = tokens.begin() + begin;
+ auto itEnd = tokens.begin() + end;
+
+ std::string S;
+ if (itBegin == itEnd)
+ return S;
+
+ S += (*itBegin).val;
+ while (++itBegin != itEnd) {
+ S += " ";
+ S += (*itBegin).val;
+ }
+ return S;
+}
+
ScriptLexer::ScriptLexer(MemoryBufferRef mb) { tokenize(mb); }
// We don't want to record cascading errors. Keep only the first one.
@@ -106,7 +122,7 @@ void ScriptLexer::setError(const Twine &msg) {
// Split S into linker script tokens.
void ScriptLexer::tokenize(MemoryBufferRef mb) {
- std::vector<StringRef> vec;
+ std::vector<Token> vec;
mbs.push_back(mb);
StringRef s = mb.getBuffer();
StringRef begin = s;
@@ -129,20 +145,19 @@ void ScriptLexer::tokenize(MemoryBufferRef mb) {
return;
}
- vec.push_back(s.take_front(e + 1));
+ vec.push_back({Kind::Quote, s.take_front(e + 1)});
s = s.substr(e + 1);
continue;
}
-
// Some operators form separate tokens.
if (s.starts_with("<<=") || s.starts_with(">>=")) {
- vec.push_back(s.substr(0, 3));
+ vec.push_back(getOperatorToken(s));
s = s.substr(3);
continue;
}
if (s.size() > 1 && ((s[1] == '=' && strchr("*/+-<>&^|", s[0])) ||
(s[0] == s[1] && strchr("<>&|", s[0])))) {
- vec.push_back(s.substr(0, 2));
+ vec.push_back(getOperatorToken(s));
s = s.substr(2);
continue;
}
@@ -155,15 +170,199 @@ void ScriptLexer::tokenize(MemoryBufferRef mb) {
// A character that cannot start a word (which is usually a
// punctuation) forms a single character token.
- if (pos == 0)
+ if (pos == 0) {
pos = 1;
- vec.push_back(s.substr(0, pos));
+ vec.push_back(getOperatorToken(s));
+ } else {
+ vec.push_back(getKeywordorIdentifier(s.substr(0, pos)));
+ }
s = s.substr(pos);
}
tokens.insert(tokens.begin() + pos, vec.begin(), vec.end());
}
+ScriptLexer::Token ScriptLexer::getOperatorToken(StringRef s) {
+ auto createToken = [&](Kind kind, size_t pos) -> Token {
+ return {kind, s.substr(0, pos)};
+ };
+
+ switch (s.front()) {
+ case EOF:
+ return createToken(Kind::Eof, 0);
+ case '(':
+ return createToken(Kind::BracektBegin, 1);
+ case ')':
+ return createToken(Kind::BracektEnd, 1);
+ case '{':
+ return createToken(Kind::CurlyBegin, 1);
+ case '}':
+ return createToken(Kind::CurlyEnd, 1);
+ case ';':
+ return createToken(Kind::Semicolon, 1);
+ case ',':
+ return createToken(Kind::Comma, 1);
+ case ':':
+ return createToken(Kind::Colon, 1);
+ case '?':
+ return createToken(Kind::Question, 1);
+ case '%':
+ return createToken(Kind::Percent, 1);
+ case '!':
+ if (s.size() > 1 && s[1] == '=')
+ return createToken(Kind::NotEqual, 2);
+ return createToken(Kind::Excalamation, 1);
+ case '*':
+ if (s.size() > 1 && s[1] == '=')
+ return createToken(Kind::MulAssign, 2);
+ return createToken(Kind::Asterisk, 1);
+ case '/':
+ if (s.size() > 1 && s[1] == '=')
+ return createToken(Kind::DivAssign, 2);
+ return createToken(Kind::Slash, 1);
+ case '=':
+ if (s.size() > 1 && s[1] == '=')
+ return createToken(Kind::Equal, 2);
+ return createToken(Kind::Assign, 1);
+ case '+':
+ if (s.size() > 1 && s[1] == '=')
+ return createToken(Kind::PlusAssign, 2);
+ return createToken(Kind::Plus, 1);
+ case '-':
+ if (s.size() > 1 && s[1] == '=')
+ return createToken(Kind::MinusAssign, 2);
+ return createToken(Kind::Minus, 1);
+ case '<':
+ if (s.size() > 2 && s[1] == s[0] && s[2] == '=')
+ return createToken(Kind::LeftShiftAssign, 3);
+ if (s.size() > 1) {
+ if (s[1] == '=')
+ return createToken(Kind::LessEqual, 2);
+ if (s[1] == '<')
+ return createToken(Kind::LeftShift, 2);
+ }
+ return createToken(Kind::Less, 1);
+ case '>':
+ if (s.size() > 2 && s[1] == s[0] && s[2] == '=')
+ return createToken(Kind::RightShiftAssign, 3);
+ if (s.size() > 1) {
+ if (s[1] == '=')
+ return createToken(Kind::GreaterEqual, 2);
+ if (s[1] == '>')
+ return createToken(Kind::RightShift, 2);
+ }
+ return createToken(Kind::Greater, 1);
+ case '&':
+ if (s.size() > 1) {
+ if (s[1] == '=')
+ return createToken(Kind::AndAssign, 2);
+ if (s[1] == '&')
+ return createToken(Kind::AndGate, 2);
+ }
+ return createToken(Kind::Bitwise, 1);
+ case '^':
+ if (s.size() > 1 && s[1] == '=')
+ return createToken(Kind::XorAssign, 2);
+ return createToken(Kind::Xor, 1);
+ case '|':
+ if (s.size() > 1) {
+ if (s[1] == '=')
+ return createToken(Kind::OrAssign, 2);
+ if (s[1] == '|')
+ return createToken(Kind::OrGate, 2);
+ }
+ return createToken(Kind::Or, 1);
+ case '.':
+ return createToken(Kind::Dot, 1);
+ case '_':
+ return createToken(Kind::Underscore, 1);
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ return createToken(Kind::Decimal, 1);
+ default:
+ return {Kind::Identifier, s};
+ }
+}
+
+ScriptLexer::Token ScriptLexer::getKeywordorIdentifier(StringRef s) {
+ static const std::unordered_map<std::string, Kind> keywords = {
+ {"ENTRY", Kind::Entry},
+ {"INPUT", Kind::Input},
+ {"GROUP", Kind::Group},
+ {"INCLUDE", Kind::Include},
+ {"MEMORY", Kind::Memory},
+ {"OUTPUT", Kind::Output},
+ {"SEARCH_DIR", Kind::SearchDir},
+ {"STARTUP", Kind::Startup},
+ {"INSERT", Kind::Insert},
+ {"AFTER", Kind::After},
+ {"OUTPUT_FORMAT", Kind::OutputFormat},
+ {"TARGET", Kind::Target},
+ {"ASSERT", Kind::Assert},
+ {"CONSTANT", Kind::Constant},
+ {"EXTERN", Kind::Extern},
+ {"OUTPUT_ARCH", Kind::OutputArch},
+ {"NOCROSSREFS", Kind::Nocrossrefs},
+ {"NOCROSSREFS_TO", Kind::NocrossrefsTo},
+ {"PROVIDE", Kind::Provide},
+ {"HIDDEN", Kind::Hidden},
+ {"PROVIDE_HIDDEN", Kind::ProvideHidden},
+ {"SECTIONS", Kind::Sections},
+ {"BEFORE", Kind::Before},
+ {"EXCLUDE_FILE", Kind::ExcludeFile},
+ {"KEEP", Kind::Keep},
+ {"INPUT_SECTION_FLAGS", Kind::InputSectionFlags},
+ {"OVERLAY", Kind::Overlay},
+ {"NOLOAD", Kind::Noload},
+ {"COPY", Kind::Copy},
+ {"INFO", Kind::Info},
+ {"OVERWRITE_SECTIONS", Kind::OverwriteSections},
+ {"SUBALIGN", Kind::Subalign},
+ {"ONLY_IF_RO", Kind::OnlyIfRO},
+ {"ONLY_IF_RW", Kind::OnlyIfRW},
+ {"FILL", Kind::Fill},
+ {"SORT", Kind::Sort},
+ {"ABSOLUTE", Kind::Absolute},
+ {"ADDR", Kind::Addr},
+ {"ALIGN", Kind::Align},
+ {"ALIGNOF", Kind::Alignof},
+ {"DATA_SEGMENT_ALIGN", Kind::DataSegmentAlign},
+ {"DATA_SEGMENT_END", Kind::DataSegmentEnd},
+ {"DATA_SEGMENT_RELRO_END", Kind::DataSegmentRelroEnd},
+ {"DEFINED", Kind::Defined},
+ {"LENGTH", Kind::Length},
+ {"LOADADDR", Kind::Loadaddr},
+ {"LOG2CEIL", Kind::Log2ceil},
+ {"MAX", Kind::Max},
+ {"MIN", Kind::Min},
+ {"ORIGIN", Kind::Origin},
+ {"SEGMENT_START", Kind::SegmentStart},
+ {"SIZEOF", Kind::Sizeof},
+ {"SIZEOF_HEADERS", Kind::SizeofHeaders},
+ {"FILEHDR", Kind::Filehdr},
+ {"PHDRS", Kind::Phdrs},
+ {"AT", Kind::At},
+ {"FLAGS", Kind::Flags},
+ {"VERSION", Kind::Version},
+ {"REGION_ALIAS", Kind::RegionAlias},
+ {"AS_NEEDED", Kind::AsNeeded},
+ {"CONSTRUCTORS", Kind::Constructors},
+ {"MAXPAGESIZE", Kind::Maxpagesize},
+ {"COMMONPAGESIZE", Kind::Commonpagesize}};
+ auto it = keywords.find(s.str());
+ if (it != keywords.end())
+ return {it->second, s};
+ return {Kind::Identifier, s};
+}
+
// Skip leading whitespace characters or comments.
StringRef ScriptLexer::skipSpace(StringRef s) {
for (;;) {
@@ -195,37 +394,37 @@ bool ScriptLexer::atEOF() { return errorCount() || tokens.size() == pos; }
// Split a given string as an expression.
// This function returns "3", "*" and "5" for "3*5" for example.
-static std::vector<StringRef> tokenizeExpr(StringRef s) {
+std::vector<ScriptLexer::Token> ScriptLexer::tokenizeExpr(StringRef s) {
StringRef ops = "!~*/+-<>?^:="; // List of operators
// Quoted strings are literal strings, so we don't want to split it.
if (s.starts_with("\""))
- return {s};
+ return {{Kind::Quote, s}};
// Split S with operators as separators.
- std::vector<StringRef> ret;
+ std::vector<ScriptLexer::Token> ret;
while (!s.empty()) {
size_t e = s.find_first_of(ops);
// No need to split if there is no operator.
if (e == StringRef::npos) {
- ret.push_back(s);
+ ret.push_back({Kind::Identifier, s});
break;
}
// Get a token before the operator.
if (e != 0)
- ret.push_back(s.substr(0, e));
+ ret.push_back({Kind::Identifier, s.substr(0, e)});
// Get the operator as a token.
// Keep !=, ==, >=, <=, << and >> operators as a single tokens.
if (s.substr(e).starts_with("!=") || s.substr(e).starts_with("==") ||
s.substr(e).starts_with(">=") || s.substr(e).starts_with("<=") ||
s.substr(e).starts_with("<<") || s.substr(e).starts_with(">>")) {
- ret.push_back(s.substr(e, 2));
+ ret.push_back(getOperatorToken(s.substr(e)));
s = s.substr(e + 2);
} else {
- ret.push_back(s.substr(e, 1));
+ ret.push_back(getOperatorToken(s.substr(e, 1)));
s = s.substr(e + 1);
}
}
@@ -242,32 +441,29 @@ static std::vector<StringRef> tokenizeExpr(StringRef s) {
//
// This function may split the current token into multiple tokens.
void ScriptLexer::maybeSplitExpr() {
- if (!inExpr || errorCount() || atEOF())
- return;
-
- std::vector<StringRef> v = tokenizeExpr(tokens[pos]);
+ std::vector<Token> v = tokenizeExpr(tokens[pos].val);
if (v.size() == 1)
return;
tokens.erase(tokens.begin() + pos);
tokens.insert(tokens.begin() + pos, v.begin(), v.end());
}
-StringRef ScriptLexer::next() {
- maybeSplitExpr();
-
+ScriptLexer::Token ScriptLexer::next() {
if (errorCount())
- return "";
+ return {Kind::Error, ""};
if (atEOF()) {
setError("unexpected EOF");
- return "";
+ return {Kind::Eof, ""};
}
+ if (inExpr)
+ maybeSplitExpr();
return tokens[pos++];
}
-StringRef ScriptLexer::peek() {
- StringRef tok = next();
+ScriptLexer::Token ScriptLexer::peek() {
+ Token tok = next();
if (errorCount())
- return "";
+ return {Kind::Error, ""};
pos = pos - 1;
return tok;
}
@@ -283,8 +479,8 @@ bool ScriptLexer::consume(StringRef tok) {
bool ScriptLexer::consumeLabel(StringRef tok) {
if (consume((tok + ":").str()))
return true;
- if (tokens.size() >= pos + 2 && tokens[pos] == tok &&
- tokens[pos + 1] == ":") {
+ if (tokens.size() >= pos + 2 && tokens[pos].val == tok &&
+ tokens[pos + 1].val == ":") {
pos += 2;
return true;
}
@@ -296,9 +492,9 @@ void ScriptLexer::skip() { (void)next(); }
void ScriptLexer::expect(StringRef expect) {
if (errorCount())
return;
- StringRef tok = next();
+ Token tok = next();
if (tok != expect)
- setError(expect + " expected, but got " + tok);
+ setError(expect + " expected, but got " + tok.val);
}
// Returns true if S encloses T.
@@ -312,7 +508,7 @@ MemoryBufferRef ScriptLexer::getCurrentMB() {
if (pos == 0)
return mbs.back();
for (MemoryBufferRef mb : mbs)
- if (encloses(mb.getBuffer(), tokens[pos - 1]))
+ if (encloses(mb.getBuffer(), tokens[pos - 1].val))
return mb;
llvm_unreachable("getCurrentMB: failed to find a token");
}
diff --git a/lld/ELF/ScriptLexer.h b/lld/ELF/ScriptLexer.h
index d5393818ed553..b98635ef0aeb2 100644
--- a/lld/ELF/ScriptLexer.h
+++ b/lld/ELF/ScriptLexer.h
@@ -9,6 +9,7 @@
#ifndef LLD_ELF_SCRIPT_LEXER_H
#define LLD_ELF_SCRIPT_LEXER_H
+#include "ScriptToken.h"
#include "lld/Common/LLVM.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/MemoryBufferRef.h"
@@ -18,14 +19,22 @@ namespace lld::elf {
class ScriptLexer {
public:
+ struct Token {
+ Kind kind;
+ StringRef val;
+ inline bool operator==(StringRef other) { return val == other; }
+
+ inline bool operator!=(StringRef other) { return val != other; }
+ };
+
explicit ScriptLexer(MemoryBufferRef mb);
void setError(const Twine &msg);
void tokenize(MemoryBufferRef mb);
StringRef skipSpace(StringRef s);
bool atEOF();
- StringRef next();
- StringRef peek();
+ Token next();
+ Token peek();
void skip();
bool consume(StringRef tok);
void expect(StringRef expect);
@@ -34,7 +43,8 @@ class ScriptLexer {
MemoryBufferRef getCurrentMB();
std::vector<MemoryBufferRef> mbs;
- std::vector<StringRef> tokens;
+ std::vector<Token> tokens;
+ std::string joinTokens(size_t begin, size_t end);
bool inExpr = false;
size_t pos = 0;
@@ -46,6 +56,10 @@ class ScriptLexer {
StringRef getLine();
size_t getLineNumber();
size_t getColumnNumber();
+
+ Token getOperatorToken(StringRef s);
+ Token getKeywordorIdentifier(StringRef s);
+ std::vector<ScriptLexer::Token> tokenizeExpr(StringRef s);
};
} // namespace lld::elf
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index 49aa7e6374905..bae5ae3d1e7e7 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -109,7 +109,7 @@ class ScriptParser final : ScriptLexer {
SortSectionPolicy peekSortKind();
SortSectionPolicy readSortKind();
SymbolAssignment *readProvideHidden(bool provide, bool hidden);
- SymbolAssignment *readAssignment(StringRef tok);
+ SymbolAssignment *readAssignment(ScriptLexer::Token tok);
void readSort();
Expr readAssert();
Expr readConstant();
@@ -119,7 +119,7 @@ class ScriptParser final : ScriptLexer {
void readMemoryAttributes(uint32_t &flags, uint32_t &invFlags,
uint32_t &negFlags, uint32_t &negInvFlags);
- Expr combine(StringRef op, Expr l, Expr r);
+ Expr combine(ScriptLexer::Token op, Expr l, Expr r);
Expr readExpr();
Expr readExpr1(Expr lhs, int minPrec);
StringRef readParenLiteral();
@@ -153,6 +153,12 @@ static StringRef unquote(StringRef s) {
return s;
}
+static StringRef unquote(ScriptLexer::Token tok) {
+ if (tok.val.starts_with("\""))
+ return tok.val.substr(1, tok.val.size() - 2);
+ return tok.val;
+}
+
// Some operations only support one non absolute value. Move the
// absolute one to the right hand side for convenience.
static void moveAbsRight(ExprValue &a, ExprValue &b) {
@@ -201,7 +207,7 @@ void ScriptParser::readDynamicList() {
expect(";");
if (!atEOF()) {
- setError("EOF expected, but got " + next());
+ setError("EOF expected, but got " + next().val);
return;
}
if (!locals.empty()) {
@@ -216,7 +222,7 @@ void ScriptParser::readDynamicList() {
void ScriptParser::readVersionScript() {
readVersionScriptCommand();
if (!atEOF())
- setError("EOF expected, but got " + next());
+ setError("EOF expected, but got " + next().val);
}
void ScriptParser::readVersionScriptCommand() {
@@ -226,14 +232,14 @@ void ScriptParser::readVersionScriptCommand() {
}
while (!atEOF() && !errorCount() && peek() != "}") {
- StringRef verStr = next();
- if (verStr == "{") {
+ ScriptLexer::Token verTok = next();
+ if (verTok.kind == Kind::CurlyBegin) {
setError("anonymous version definition is used in "
"combination with other version definitions");
return;
}
expect("{");
- readVersionDeclaration(verStr);
+ readVersionDeclaration(verTok.val);
}
}
@@ -245,50 +251,71 @@ void ScriptParser::readVersion() {
void ScriptParser::readLinkerScript() {
while (!atEOF()) {
- StringRef tok = next();
- if (tok == ";")
+ ScriptLexer::Token tok = next();
+ if (tok.kind == Kind::Semicolon)
continue;
- if (tok == "ENTRY") {
+ switch (tok.kind) {
+ case Kind::Entry:
readEntry();
- } else if (tok == "EXTERN") {
+ break;
+ case Kind::Extern:
readExtern();
- } else if (tok == "GROUP") {
+ break;
+ case Kind::Group:
readGroup();
- } else if (tok == "INCLUDE") {
+ break;
+ case Kind::Include:
readInclude();
- } else if (tok == "INPUT") {
+ break;
+ case Kind::Input:
readInput();
- } else if (tok == "MEMORY") {
+ break;
+ case Kind::Memory:
readMemory();
- } else if (tok == "OUTPUT") {
+ break;
+ case Kind::Output:
readOutput();
- } else if (tok == "OUTPUT_ARCH") {
+ break;
+ case Kind::OutputArch:
readOutputArch();
- } else if (tok == "OUTPUT_FORMAT") {
+ break;
+ case Kind::OutputFormat:
readOutputFormat();
- } else if (tok == "OVERWRITE_SECTIONS") {
+ break;
+ case Kind::OverwriteSections:
readOverwriteSections();
- } else if (tok == "PHDRS") {
+ break;
+ case Kind::Phdrs:
readPhdrs();
- } else if (tok == "REGION_ALIAS") {
+ break;
+ case Kind::RegionAlias:
readRegionAlias();
- } else if (tok == "SEARCH_DIR") {
+ break;
+ case Kind::SearchDir:
readSearchDir();
- } else if (tok == "SECTIONS") {
+ break;
+ case Kind::Sections:
readSections();
- } else if (tok == "TARGET") {
+ break;
+ case Kind::Target:
readTarget();
- } else if (tok == "VERSION") {
+ break;
+ case Kind::Version:
readVersion();
- } else if (tok == "NOCROSSREFS") {
+ break;
+ case Kind::Nocrossrefs:
readNoCrossRefs(/*to=*/false);
- } else if (tok == "NOCROSSREFS_TO") {
+ break;
+ case Kind::NocrossrefsTo:
readNoCrossRefs(/*to=*/true);
- } else if (SymbolAssignment *cmd = readAssignment(tok)) {
- script->sectionCommands.push_back(cmd);
- } else {
- setError("unknown directive: " + tok);
+ break;
+ default:
+ if (SymbolAssignment *cmd = readAssignment(tok)) {
+ script->sectionCommands.push_back(cmd);
+ } else {
+ setError("unknown directive: " + tok.val);
+ }
}
}
}
@@ -298,7 +325,7 @@ void ScriptParser::readDefsym(StringRef name) {
return;
Expr e = readExpr();
if (!atEOF())
- setError("EOF expected, but got " + next());
+ setError("EOF expected, but got " + next().val);
auto *cmd = make<SymbolAssignment>(
name, e, 0, getCurrentMB().getBufferIdentifier().str());
script->sectionCommands.push_back(cmd);
@@ -376,7 +403,7 @@ void ScriptParser::readAsNeeded() {
void ScriptParser::readEntry() {
// -e <symbol> takes predecence over ENTRY(<symbol>).
expect("(");
- StringRef tok = next();
+ StringRef tok = next().val;
if (config->entry.empty())
config->entry = unquote(tok);
expect(")");
@@ -426,7 +453,7 @@ void ScriptParser::readInput() {
void ScriptParser::readOutput() {
// -o <file> takes predecence over OUTPUT(<file>).
expect("(");
- StringRef tok = next();
+ StringRef tok = next().val;
if (config->outputF...
[truncated]
|
lld/ELF/ScriptToken.h
Outdated
|
||
namespace lld { | ||
namespace elf { | ||
enum class Kind { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Kind
could cause ambiguity. I am open to suggestions, but if we aim for conciseness, Tok
is a nice choice.
The token kinds will be used a lot, so a short enum name is useful.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes agree. Updated Kind
to Tok
!
lld/ELF/ScriptLexer.cpp
Outdated
if (itBegin == itEnd) | ||
return S; | ||
|
||
S += (*itBegin).val; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: itBegin->val
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OKie!
lld/ELF/ScriptLexer.cpp
Outdated
S += (*itBegin).val; | ||
while (++itBegin != itEnd) { | ||
S += " "; | ||
S += (*itBegin).val; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: same
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
@@ -90,6 +90,22 @@ std::string ScriptLexer::getCurrentLocation() { | |||
return (filename + ":" + Twine(getLineNumber())).str(); | |||
} | |||
|
|||
std::string ScriptLexer::joinTokens(size_t begin, size_t end) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This function may not be needed once the Lexer can be fully stateful one. I plan to keep this for temporary resolution for printing out StringRef from struct Token
lld/ELF/ScriptLexer.cpp
Outdated
} | ||
|
||
ScriptLexer::Token ScriptLexer::getKeywordorIdentifier(StringRef s) { | ||
static const std::unordered_map<std::string, Kind> keywords = { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could be made a static constexpr global instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also [refer to use llvm::StringMap
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes updated it to llvm::StringMap
Could you please me to check if the updated way would be better? Thank you!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
StringMap
allocates memory to hold the string, which is wasteful when we can use string literals. DenseMap<CachedHashStringRef, X
might be better.
lld/ELF/ScriptToken.h
Outdated
DecimalM, // end with M/m | ||
|
||
// Symbol tokens | ||
CurlyBegin, // { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the absence of other concerns, these should probably follow the Unicode naming convention for these symbols:
Left/right curly bracket
Left/right parenthesis
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated!
lld/ELF/ScriptToken.h
Outdated
Less, // < | ||
Minus, // - | ||
Plus, // + | ||
Bitwise, // & |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
BitwiseAnd,
BitwiseXor
BitwiseOr
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!
lld/ELF/ScriptToken.h
Outdated
LessEqual, // <= | ||
LeftShift, // << | ||
RightShift, // >> | ||
AndGate, // && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
And,
Or,
"Gate" is uncommon in software contexts.
Alternatively,
LogicalAnd,
LogicalOr
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes updated thank you!
s = s.substr(e + 1); | ||
continue; | ||
} | ||
|
||
// Some operators form separate tokens. | ||
if (s.starts_with("<<=") || s.starts_with(">>=")) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's a good deal of overlap between the logic that wraps these clauses and the logic in getOperatorToken()
. Could we split getOperatorToken()
into a family of functions that recognizes each of these classes of tokens, returning an optional token? That would allow the logic to be handled only once, and the style in getOperatorToken()
is definitely the more readable of the two mechanisms. We could probably also break out the quoted string recognization above into a similar function.
lld/ELF/ScriptLexer.cpp
Outdated
{"INFO", Tok::Info}, | ||
{"OVERWRITE_SECTIONS", Tok::OverwriteSections}, | ||
{"SUBALIGN", Tok::Subalign}, | ||
{"ONLY_IF_RO", Tok::OnlyIfRO}, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OnlyIfRo, and OnlyIfRw to follow the naming convention of the others (underscores separating words).
lld/ELF/ScriptLexer.cpp
Outdated
{"MAXPAGESIZE", Tok::Maxpagesize}, | ||
{"COMMONPAGESIZE", Tok::Commonpagesize}}; | ||
|
||
ScriptLexer::Token ScriptLexer::getKeywordorIdentifier(StringRef s) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
getKeywordOrIdentifier (capitalize the "O").
lld/ELF/ScriptLexer.h
Outdated
struct Token { | ||
Tok kind; | ||
StringRef val; | ||
inline bool operator==(StringRef other) { return val == other; } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
inline
is redundant here and below; function definitions within class definitions are implicitly inline.
lld/ELF/ScriptParser.cpp
Outdated
|
||
// Tok is a symbol name. | ||
StringRef tokVal = tok.val; | ||
if (tokVal.starts_with("\"")) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: tokVal.starts_with('"')
The lexer representation should probably be made stateful (and remove I'll take a stab at removing |
Sounds good! I will have a new branch and PR for remove |
This change is reflecting one of the issues to replace current linker script lexer with a fully stateful lexer.
This change:
struct Token{Kind kind, StringRef val}
vector<StringRef> tokens
withvector<Token> tokens
peek()
andnext()
to returnToken
This change does not fully reflect the correct symbol/operator kind since our current Lexer is not stateful enough to support it; most of the symbols/operator checks are using StringRef comparison. The next steps:
should be able to support symbol/operator kind once they are done.