Skip to content

Commit 49fd778

Browse files
committed
Comment parsing: add argument parsing for @throw @throws @exception
Doxygen allows for the @throw, @throws, and @exception commands to have an attached argument indicating the type being thrown. Currently, Clang's AST parsing doesn't support parsing out this argument from doc comments. The result is missing compatibility with Doxygen. We would find it helpful if the AST exposed these thrown types as BlockCommandComment arguments so that we could generate better documentation. This PR implements parsing of arguments for the @throw, @throws, and @exception commands. Each command can only have one argument, matching the semantics of Doxygen. We have also added unit tests to validate the functionality.
1 parent dbe63e3 commit 49fd778

File tree

4 files changed

+373
-4
lines changed

4 files changed

+373
-4
lines changed

clang/include/clang/AST/CommentCommands.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,9 @@ def Tparam : BlockCommand<"tparam"> { let IsTParamCommand = 1; }
132132
// HeaderDoc command for template parameter documentation.
133133
def Templatefield : BlockCommand<"templatefield"> { let IsTParamCommand = 1; }
134134

135-
def Throws : BlockCommand<"throws"> { let IsThrowsCommand = 1; }
136-
def Throw : BlockCommand<"throw"> { let IsThrowsCommand = 1; }
137-
def Exception : BlockCommand<"exception"> { let IsThrowsCommand = 1; }
135+
def Throws : BlockCommand<"throws"> { let IsThrowsCommand = 1; let NumArgs = 1; }
136+
def Throw : BlockCommand<"throw"> { let IsThrowsCommand = 1; let NumArgs = 1; }
137+
def Exception : BlockCommand<"exception"> { let IsThrowsCommand = 1; let NumArgs = 1;}
138138

139139
def Deprecated : BlockCommand<"deprecated"> {
140140
let IsEmptyParagraphAllowed = 1;

clang/include/clang/AST/CommentParser.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,9 @@ class Parser {
100100
ArrayRef<Comment::Argument>
101101
parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs);
102102

103+
ArrayRef<Comment::Argument>
104+
parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs);
105+
103106
BlockCommandComment *parseBlockCommand();
104107
InlineCommandComment *parseInlineCommand();
105108

clang/lib/AST/CommentParser.cpp

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,25 @@ class TextTokenRetokenizer {
7575
return *Pos.BufferPtr;
7676
}
7777

78+
char peekNext(unsigned offset) const {
79+
assert(!isEnd());
80+
assert(Pos.BufferPtr != Pos.BufferEnd);
81+
if (Pos.BufferPtr + offset <= Pos.BufferEnd) {
82+
return *(Pos.BufferPtr + offset);
83+
} else {
84+
return '\0';
85+
}
86+
}
87+
88+
void peekNextToken(SmallString<32> &WordText) const {
89+
unsigned offset = 1;
90+
char C = peekNext(offset++);
91+
while (!isWhitespace(C) && C != '\0') {
92+
WordText.push_back(C);
93+
C = peekNext(offset++);
94+
}
95+
}
96+
7897
void consumeChar() {
7998
assert(!isEnd());
8099
assert(Pos.BufferPtr != Pos.BufferEnd);
@@ -89,6 +108,29 @@ class TextTokenRetokenizer {
89108
}
90109
}
91110

111+
/// Extract a template type
112+
bool lexTemplateType(SmallString<32> &WordText) {
113+
unsigned IncrementCounter = 0;
114+
while (!isEnd()) {
115+
const char C = peek();
116+
WordText.push_back(C);
117+
consumeChar();
118+
switch (C) {
119+
default:
120+
break;
121+
case '<': {
122+
IncrementCounter++;
123+
} break;
124+
case '>': {
125+
IncrementCounter--;
126+
if (!IncrementCounter)
127+
return true;
128+
} break;
129+
}
130+
}
131+
return false;
132+
}
133+
92134
/// Add a token.
93135
/// Returns true on success, false if there are no interesting tokens to
94136
/// fetch from lexer.
@@ -149,6 +191,76 @@ class TextTokenRetokenizer {
149191
addToken();
150192
}
151193

194+
/// Extract a type argument
195+
bool lexDataType(Token &Tok) {
196+
if (isEnd())
197+
return false;
198+
Position SavedPos = Pos;
199+
consumeWhitespace();
200+
SmallString<32> NextToken;
201+
SmallString<32> WordText;
202+
const char *WordBegin = Pos.BufferPtr;
203+
SourceLocation Loc = getSourceLocation();
204+
StringRef ConstVal = StringRef("const");
205+
bool ConstPointer = false;
206+
207+
while (!isEnd()) {
208+
const char C = peek();
209+
if (!isWhitespace(C)) {
210+
if (C == '<') {
211+
if (!lexTemplateType(WordText))
212+
return false;
213+
} else {
214+
WordText.push_back(C);
215+
consumeChar();
216+
}
217+
} else {
218+
if (WordText.equals(ConstVal)) {
219+
WordText.push_back(C);
220+
consumeChar();
221+
} else if (WordText.ends_with(StringRef("*")) ||
222+
WordText.ends_with(StringRef("&"))) {
223+
NextToken.clear();
224+
peekNextToken(NextToken);
225+
if (NextToken.equals(ConstVal)) {
226+
ConstPointer = true;
227+
WordText.push_back(C);
228+
consumeChar();
229+
} else {
230+
consumeChar();
231+
break;
232+
}
233+
} else {
234+
NextToken.clear();
235+
peekNextToken(NextToken);
236+
if ((NextToken.ends_with(StringRef("*")) ||
237+
NextToken.ends_with(StringRef("&"))) &&
238+
!ConstPointer) {
239+
WordText.push_back(C);
240+
consumeChar();
241+
} else {
242+
consumeChar();
243+
break;
244+
}
245+
}
246+
}
247+
}
248+
249+
const unsigned Length = WordText.size();
250+
if (Length == 0) {
251+
Pos = SavedPos;
252+
return false;
253+
}
254+
255+
char *TextPtr = Allocator.Allocate<char>(Length + 1);
256+
257+
memcpy(TextPtr, WordText.c_str(), Length + 1);
258+
StringRef Text = StringRef(TextPtr, Length);
259+
260+
formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
261+
return true;
262+
}
263+
152264
/// Extract a word -- sequence of non-whitespace characters.
153265
bool lexWord(Token &Tok) {
154266
if (isEnd())
@@ -295,6 +407,7 @@ Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) {
295407
Comment::Argument[NumArgs];
296408
unsigned ParsedArgs = 0;
297409
Token Arg;
410+
298411
while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
299412
Args[ParsedArgs] = Comment::Argument{
300413
SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
@@ -304,6 +417,23 @@ Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) {
304417
return llvm::ArrayRef(Args, ParsedArgs);
305418
}
306419

420+
ArrayRef<Comment::Argument>
421+
Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer,
422+
unsigned NumArgs) {
423+
auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
424+
Comment::Argument[NumArgs];
425+
unsigned ParsedArgs = 0;
426+
Token Arg;
427+
428+
while (ParsedArgs < NumArgs && Retokenizer.lexDataType(Arg)) {
429+
Args[ParsedArgs] = Comment::Argument{
430+
SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
431+
ParsedArgs++;
432+
}
433+
434+
return llvm::ArrayRef(Args, ParsedArgs);
435+
}
436+
307437
BlockCommandComment *Parser::parseBlockCommand() {
308438
assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
309439

@@ -356,6 +486,9 @@ BlockCommandComment *Parser::parseBlockCommand() {
356486
parseParamCommandArgs(PC, Retokenizer);
357487
else if (TPC)
358488
parseTParamCommandArgs(TPC, Retokenizer);
489+
else if (Info->IsThrowsCommand)
490+
S.actOnBlockCommandArgs(
491+
BC, parseThrowCommandArgs(Retokenizer, Info->NumArgs));
359492
else
360493
S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, Info->NumArgs));
361494

0 commit comments

Comments
 (0)