Skip to content

Commit 8eb8c92

Browse files
committed
[clangd] Add library to semantically strip flags by name.
Summary: This is designed for tweaking compile commands by specifying flags to add/remove in a config file. Something like: CompileFlags: { Remove: -fcolor-diagnostics } Having users tweak raw argv (e.g. with a regex) is going to end in tears: bugs around clang-cl, xclang, aliases, joined-vs-separate args etc are inevitable. This isn't in tooling because of the performance choices: build a big table up-front to make subsequent actions fast. Maybe it should be though. Reviewers: adamcz, hokein Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D81958
1 parent 8a24208 commit 8eb8c92

File tree

3 files changed

+467
-0
lines changed

3 files changed

+467
-0
lines changed

clang-tools-extra/clangd/CompileCommands.cpp

Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,12 @@
99
#include "CompileCommands.h"
1010
#include "Config.h"
1111
#include "support/Logger.h"
12+
#include "clang/Driver/Options.h"
1213
#include "clang/Frontend/CompilerInvocation.h"
1314
#include "clang/Tooling/ArgumentsAdjusters.h"
15+
#include "llvm/Option/Option.h"
16+
#include "llvm/Support/Allocator.h"
17+
#include "llvm/Support/Debug.h"
1418
#include "llvm/Support/FileSystem.h"
1519
#include "llvm/Support/FileUtilities.h"
1620
#include "llvm/Support/MemoryBuffer.h"
@@ -234,5 +238,269 @@ CommandMangler::operator clang::tooling::ArgumentsAdjuster() && {
234238
};
235239
}
236240

241+
// ArgStripper implementation
242+
namespace {
243+
244+
// Determine total number of args consumed by this option.
245+
// Return answers for {Exact, Prefix} match. 0 means not allowed.
246+
std::pair<unsigned, unsigned> getArgCount(const llvm::opt::Option &Opt) {
247+
constexpr static unsigned Rest = 10000; // Should be all the rest!
248+
// Reference is llvm::opt::Option::acceptInternal()
249+
using llvm::opt::Option;
250+
switch (Opt.getKind()) {
251+
case Option::FlagClass:
252+
return {1, 0};
253+
case Option::JoinedClass:
254+
case Option::CommaJoinedClass:
255+
return {1, 1};
256+
case Option::GroupClass:
257+
case Option::InputClass:
258+
case Option::UnknownClass:
259+
case Option::ValuesClass:
260+
return {1, 0};
261+
case Option::JoinedAndSeparateClass:
262+
return {2, 2};
263+
case Option::SeparateClass:
264+
return {2, 0};
265+
case Option::MultiArgClass:
266+
return {1 + Opt.getNumArgs(), 0};
267+
case Option::JoinedOrSeparateClass:
268+
return {2, 1};
269+
case Option::RemainingArgsClass:
270+
return {Rest, 0};
271+
case Option::RemainingArgsJoinedClass:
272+
return {Rest, Rest};
273+
}
274+
}
275+
276+
// Flag-parsing mode, which affects which flags are available.
277+
enum DriverMode : unsigned char {
278+
DM_None = 0,
279+
DM_GCC = 1, // Default mode e.g. when invoked as 'clang'
280+
DM_CL = 2, // MS CL.exe compatible mode e.g. when invoked as 'clang-cl'
281+
DM_CC1 = 4, // When invoked as 'clang -cc1' or after '-Xclang'
282+
DM_All = 7
283+
};
284+
285+
// Examine args list to determine if we're in GCC, CL-compatible, or cc1 mode.
286+
DriverMode getDriverMode(const std::vector<std::string> &Args) {
287+
DriverMode Mode = DM_GCC;
288+
llvm::StringRef Argv0 = Args.front();
289+
if (Argv0.endswith_lower(".exe"))
290+
Argv0 = Argv0.drop_back(strlen(".exe"));
291+
if (Argv0.endswith_lower("cl"))
292+
Mode = DM_CL;
293+
for (const llvm::StringRef Arg : Args) {
294+
if (Arg == "--driver-mode=cl") {
295+
Mode = DM_CL;
296+
break;
297+
}
298+
if (Arg == "-cc1") {
299+
Mode = DM_CC1;
300+
break;
301+
}
302+
}
303+
return Mode;
304+
}
305+
306+
// Returns the set of DriverModes where an option may be used.
307+
unsigned char getModes(const llvm::opt::Option &Opt) {
308+
// Why is this so complicated?!
309+
// Reference is clang::driver::Driver::getIncludeExcludeOptionFlagMasks()
310+
unsigned char Result = DM_None;
311+
if (Opt.hasFlag(driver::options::CC1Option))
312+
Result |= DM_CC1;
313+
if (!Opt.hasFlag(driver::options::NoDriverOption)) {
314+
if (Opt.hasFlag(driver::options::CLOption)) {
315+
Result |= DM_CL;
316+
} else {
317+
Result |= DM_GCC;
318+
if (Opt.hasFlag(driver::options::CoreOption)) {
319+
Result |= DM_CL;
320+
}
321+
}
322+
}
323+
return Result;
324+
};
325+
326+
} // namespace
327+
328+
llvm::ArrayRef<ArgStripper::Rule> ArgStripper::rulesFor(llvm::StringRef Arg) {
329+
// All the hard work is done once in a static initializer.
330+
// We compute a table containing strings to look for and #args to skip.
331+
// e.g. "-x" => {-x 2 args, -x* 1 arg, --language 2 args, --language=* 1 arg}
332+
using TableTy =
333+
llvm::StringMap<llvm::SmallVector<Rule, 4>, llvm::BumpPtrAllocator>;
334+
static TableTy *Table = [] {
335+
auto &DriverTable = driver::getDriverOptTable();
336+
using DriverID = clang::driver::options::ID;
337+
338+
// Collect sets of aliases, so we can treat -foo and -foo= as synonyms.
339+
// Conceptually a double-linked list: PrevAlias[I] -> I -> NextAlias[I].
340+
// If PrevAlias[I] is INVALID, then I is canonical.
341+
DriverID PrevAlias[DriverID::LastOption] = {DriverID::OPT_INVALID};
342+
DriverID NextAlias[DriverID::LastOption] = {DriverID::OPT_INVALID};
343+
auto AddAlias = [&](DriverID Self, DriverID T) {
344+
if (NextAlias[T]) {
345+
PrevAlias[NextAlias[T]] = Self;
346+
NextAlias[Self] = NextAlias[T];
347+
}
348+
PrevAlias[Self] = T;
349+
NextAlias[T] = Self;
350+
};
351+
// Also grab prefixes for each option, these are not fully exposed.
352+
const char *const *Prefixes[DriverID::LastOption] = {nullptr};
353+
#define PREFIX(NAME, VALUE) static const char *const NAME[] = VALUE;
354+
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
355+
HELP, METAVAR, VALUES) \
356+
if (DriverID::OPT_##ALIAS != DriverID::OPT_INVALID && ALIASARGS == nullptr) \
357+
AddAlias(DriverID::OPT_##ID, DriverID::OPT_##ALIAS); \
358+
Prefixes[DriverID::OPT_##ID] = PREFIX;
359+
#include "clang/Driver/Options.inc"
360+
#undef OPTION
361+
#undef PREFIX
362+
363+
auto Result = std::make_unique<TableTy>();
364+
// Iterate over distinct options (represented by the canonical alias).
365+
// Every spelling of this option will get the same set of rules.
366+
for (unsigned ID = 1 /*Skip INVALID */; ID < DriverID::LastOption; ++ID) {
367+
if (PrevAlias[ID] || ID == DriverID::OPT_Xclang)
368+
continue; // Not canonical, or specially handled.
369+
llvm::SmallVector<Rule, 8> Rules;
370+
// Iterate over each alias, to add rules for parsing it.
371+
for (unsigned A = ID; A != DriverID::OPT_INVALID; A = NextAlias[A]) {
372+
if (Prefixes[A] == nullptr) // option groups.
373+
continue;
374+
auto Opt = DriverTable.getOption(A);
375+
// Exclude - and -foo pseudo-options.
376+
if (Opt.getName().empty())
377+
continue;
378+
auto Modes = getModes(Opt);
379+
std::pair<unsigned, unsigned> ArgCount = getArgCount(Opt);
380+
// Iterate over each spelling of the alias, e.g. -foo vs --foo.
381+
for (auto *Prefix = Prefixes[A]; *Prefix != nullptr; ++Prefix) {
382+
llvm::SmallString<64> Buf(*Prefix);
383+
Buf.append(Opt.getName());
384+
llvm::StringRef Spelling = Result->try_emplace(Buf).first->getKey();
385+
Rules.emplace_back();
386+
Rule &R = Rules.back();
387+
R.Text = Spelling;
388+
R.Modes = Modes;
389+
R.ExactArgs = ArgCount.first;
390+
R.PrefixArgs = ArgCount.second;
391+
// Concrete priority is the index into the option table.
392+
// Effectively, earlier entries take priority over later ones.
393+
assert(ID < std::numeric_limits<decltype(R.Priority)>::max() &&
394+
"Rules::Priority overflowed by options table");
395+
R.Priority = ID;
396+
}
397+
}
398+
// Register the set of rules under each possible name.
399+
for (const auto &R : Rules)
400+
Result->find(R.Text)->second.append(Rules.begin(), Rules.end());
401+
}
402+
#ifndef NDEBUG
403+
// Dump the table and various measures of its size.
404+
unsigned RuleCount = 0;
405+
dlog("ArgStripper Option spelling table");
406+
for (const auto &Entry : *Result) {
407+
dlog("{0}", Entry.first());
408+
RuleCount += Entry.second.size();
409+
for (const auto &R : Entry.second)
410+
dlog(" {0} #={1} *={2} Mode={3}", R.Text, R.ExactArgs, R.PrefixArgs,
411+
int(R.Modes));
412+
}
413+
dlog("Table spellings={0} rules={1} string-bytes={2}", Result->size(),
414+
RuleCount, Result->getAllocator().getBytesAllocated());
415+
#endif
416+
// The static table will never be destroyed.
417+
return Result.release();
418+
}();
419+
420+
auto It = Table->find(Arg);
421+
return (It == Table->end()) ? llvm::ArrayRef<Rule>() : It->second;
422+
}
423+
424+
void ArgStripper::strip(llvm::StringRef Arg) {
425+
auto OptionRules = rulesFor(Arg);
426+
if (OptionRules.empty()) {
427+
// Not a recognized flag. Strip it literally.
428+
Storage.emplace_back(Arg);
429+
Rules.emplace_back();
430+
Rules.back().Text = Storage.back();
431+
Rules.back().ExactArgs = 1;
432+
if (Rules.back().Text.consume_back("*"))
433+
Rules.back().PrefixArgs = 1;
434+
Rules.back().Modes = DM_All;
435+
Rules.back().Priority = -1; // Max unsigned = lowest priority.
436+
} else {
437+
Rules.append(OptionRules.begin(), OptionRules.end());
438+
}
439+
}
440+
441+
const ArgStripper::Rule *ArgStripper::matchingRule(llvm::StringRef Arg,
442+
unsigned Mode,
443+
unsigned &ArgCount) const {
444+
const ArgStripper::Rule *BestRule = nullptr;
445+
for (const Rule &R : Rules) {
446+
// Rule can fail to match if...
447+
if (!(R.Modes & Mode))
448+
continue; // not applicable to current driver mode
449+
if (BestRule && BestRule->Priority < R.Priority)
450+
continue; // lower-priority than best candidate.
451+
if (!Arg.startswith(R.Text))
452+
continue; // current arg doesn't match the prefix string
453+
bool PrefixMatch = Arg.size() > R.Text.size();
454+
// Can rule apply as an exact/prefix match?
455+
if (unsigned Count = PrefixMatch ? R.PrefixArgs : R.ExactArgs) {
456+
BestRule = &R;
457+
ArgCount = Count;
458+
}
459+
// Continue in case we find a higher-priority rule.
460+
}
461+
return BestRule;
462+
}
463+
464+
void ArgStripper::process(std::vector<std::string> &Args) const {
465+
if (Args.empty())
466+
return;
467+
468+
// We're parsing the args list in some mode (e.g. gcc-compatible) but may
469+
// temporarily switch to another mode with the -Xclang flag.
470+
DriverMode MainMode = getDriverMode(Args);
471+
DriverMode CurrentMode = MainMode;
472+
473+
// Read and write heads for in-place deletion.
474+
unsigned Read = 0, Write = 0;
475+
bool WasXclang = false;
476+
while (Read < Args.size()) {
477+
unsigned ArgCount = 0;
478+
if (const Rule *R = matchingRule(Args[Read], CurrentMode, ArgCount)) {
479+
// Delete it and its args.
480+
if (WasXclang) {
481+
assert(Write > 0);
482+
--Write; // Drop previous -Xclang arg
483+
CurrentMode = MainMode;
484+
WasXclang = false;
485+
}
486+
// Advance to last arg. An arg may be foo or -Xclang foo.
487+
for (unsigned I = 1; Read < Args.size() && I < ArgCount; ++I) {
488+
++Read;
489+
if (Read < Args.size() && Args[Read] == "-Xclang")
490+
++Read;
491+
}
492+
} else {
493+
// No match, just copy the arg through.
494+
WasXclang = Args[Read] == "-Xclang";
495+
CurrentMode = WasXclang ? DM_CC1 : MainMode;
496+
if (Write != Read)
497+
Args[Write] = std::move(Args[Read]);
498+
++Write;
499+
}
500+
++Read;
501+
}
502+
Args.resize(Write);
503+
}
504+
237505
} // namespace clangd
238506
} // namespace clang

clang-tools-extra/clangd/CompileCommands.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,45 @@ struct CommandMangler {
5050
Memoize<llvm::StringMap<std::string>> ResolvedDriversNoFollow;
5151
};
5252

53+
// Removes args from a command-line in a semantically-aware way.
54+
//
55+
// Internally this builds a large (0.5MB) table of clang options on first use.
56+
// Both strip() and process() are fairly cheap after that.
57+
//
58+
// FIXME: this reimplements much of OptTable, it might be nice to expose more.
59+
// The table-building strategy may not make sense outside clangd.
60+
class ArgStripper {
61+
public:
62+
// Adds the arg to the set which should be removed.
63+
//
64+
// Recognized clang flags are stripped semantically. When "-I" is stripped:
65+
// - so is its value (either as -Ifoo or -I foo)
66+
// - aliases like --include-directory=foo are also stripped
67+
// - CL-style /Ifoo will be removed if the args indicate MS-compatible mode
68+
// Compile args not recognized as flags are removed literally, except:
69+
// - strip("ABC*") will remove any arg with an ABC prefix.
70+
//
71+
// In either case, the -Xclang prefix will be dropped if present.
72+
void strip(llvm::StringRef Arg);
73+
// Remove the targets from a compile command, in-place.
74+
void process(std::vector<std::string> &Args) const;
75+
76+
private:
77+
// Deletion rules, to be checked for each arg.
78+
struct Rule {
79+
llvm::StringRef Text; // Rule applies only if arg begins with Text.
80+
unsigned char Modes = 0; // Rule applies only in specified driver modes.
81+
uint16_t Priority = 0; // Lower is better.
82+
uint16_t ExactArgs = 0; // Num args consumed when Arg == Text.
83+
uint16_t PrefixArgs = 0; // Num args consumed when Arg starts with Text.
84+
};
85+
static llvm::ArrayRef<Rule> rulesFor(llvm::StringRef Arg);
86+
const Rule *matchingRule(llvm::StringRef Arg, unsigned Mode,
87+
unsigned &ArgCount) const;
88+
llvm::SmallVector<Rule, 4> Rules;
89+
std::vector<std::string> Storage; // Store strings not found in option table.
90+
};
91+
5392
} // namespace clangd
5493
} // namespace clang
5594

0 commit comments

Comments
 (0)