@@ -189,6 +189,13 @@ struct ExtractInactiveRanges : public ASTWalker {
189
189
};
190
190
} // end anonymous namespace
191
191
192
+ // / Appends the textual contents of the provided source range, stripping
193
+ // / the contents of comments that appear in the source.
194
+ // /
195
+ // / Given that comments are treated as whitespace, this also appends a
196
+ // / space or newline (depending if the comment was multi-line and itself
197
+ // / had newlines in the body) in place of the comment, to avoid fusing tokens
198
+ // / together.
192
199
static void appendRange (
193
200
SourceManager &sourceMgr, SourceLoc start, SourceLoc end,
194
201
SmallVectorImpl<char > &scratch) {
@@ -210,28 +217,44 @@ static void appendRange(
210
217
lexer.lex (token);
211
218
212
219
if (token.is (tok::comment)) {
213
- // Append the range from the last non-comment token to the beginning of this comment
214
- // token.
220
+ // Grab the start of the full comment token (with leading trivia as well)
215
221
SourceLoc commentLoc = token.getLoc ();
216
- auto charRange = CharSourceRange (sourceMgr, nonCommentStart, commentLoc);
217
- StringRef text = sourceMgr.extractText (charRange);
218
- scratch.append (text.begin (), text.end ());
219
222
220
- // Append a single whitespace character, to avoid fusing tokens.
221
- scratch.push_back (' ' );
222
-
223
- // Set the start of the next non-comment range to the end of this token.
223
+ // Find the end of the token (with trailing trivia)
224
224
SourceLoc endLoc = Lexer::getLocForEndOfToken (sourceMgr, token.getLoc ());
225
225
226
- // The comment token's end location includes trailing whitespace, so trim trailing
226
+ // The comment token's range includes leading/ trailing whitespace, so trim
227
227
// whitespace and only strip the portions of the comment that are not whitespace.
228
228
CharSourceRange range = CharSourceRange (sourceMgr, commentLoc, endLoc);
229
- StringRef commentText = sourceMgr.extractText (range);
230
- unsigned whitespaceOffset = commentText.size () - commentText.rtrim ().size ();
231
- if (whitespaceOffset > 0 ) {
232
- endLoc = endLoc.getAdvancedLoc (-whitespaceOffset);
229
+ StringRef fullTokenText = sourceMgr.extractText (range);
230
+ unsigned leadingWhitespace = fullTokenText.size () - fullTokenText.ltrim ().size ();
231
+ if (leadingWhitespace > 0 ) {
232
+ commentLoc = commentLoc.getAdvancedLoc (leadingWhitespace);
233
+ }
234
+
235
+ unsigned trailingWhitespace = fullTokenText.size () - fullTokenText.rtrim ().size ();
236
+ if (trailingWhitespace > 0 ) {
237
+ endLoc = endLoc.getAdvancedLoc (-trailingWhitespace);
233
238
}
234
239
240
+ // First, extract the text up to the start of the comment, including the whitespace.
241
+ auto charRange = CharSourceRange (sourceMgr, nonCommentStart, commentLoc);
242
+ StringRef text = sourceMgr.extractText (charRange);
243
+ scratch.append (text.begin (), text.end ());
244
+
245
+ // Next, search through the comment text to see if it's a block comment with a newline. If so
246
+ // we need to re-insert a newline to avoid fusing multi-line tokens together.
247
+ auto commentTextRange = CharSourceRange (sourceMgr, commentLoc, endLoc);
248
+ StringRef commentText = sourceMgr.extractText (commentTextRange);
249
+ bool hasNewline = commentText.find_first_of (" \n\r " ) != StringRef::npos;
250
+
251
+ // Use a newline as a filler character if the comment itself had a newline in it.
252
+ char filler = hasNewline ? ' \n ' : ' ' ;
253
+
254
+ // Append a single whitespace filler character, to avoid fusing tokens.
255
+ scratch.push_back (filler);
256
+
257
+ // Start the next region after the contents of the comment.
235
258
nonCommentStart = endLoc;
236
259
}
237
260
}
0 commit comments